blob: ffeb9185728428f4612fcad2d9e17617a9e6b4d2 [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package bisect is used for bisecting a target repository
// with the goal of finding a commit introducing a regression.
package bisect
import (
"bufio"
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"os/exec"
"path/filepath"
"regexp"
"sync"
"time"
"golang.org/x/oscar/internal/queue"
"golang.org/x/oscar/internal/repo"
"golang.org/x/oscar/internal/storage"
"golang.org/x/oscar/internal/storage/timed"
"rsc.io/ordered"
)
const (
taskKind = "bisection.Task"
taskUpdateKind = "bisection.TaskUpdate" // used for storing task updates in a timed db
)
// This package stores the following key schemas in the database:
//
// ["bisection.Task", ID] => JSON of Task structure
// ["bisection.TaskUpdateByTime", DBTime, ID] => []
//
// Bisecting a repository for a change regression can take considerable
// time. This has an effect on how the bisection is run in gaby. If
// bisection is being run as part of a batch job, other jobs will be
// blocked by the bisection. Spawning a bisection in a goroutine
// or a process will in principle not work on Cloud Run, which can
// move or kill a gaby instance if there are no requests served [1],
// even if several bisections are being ran in the background.
//
// This package addresses this problem by asynchronous bisection.
// [Client.BisectAsync] spawns a bisection [Task] by sending it to
// a [queue.Queue], which in practice will be a Cloud Tasks [2]
// queue. The latter will then send a request to gaby, which in
// turn will call [Client.Bisect]. The results and partial progress
// of bisection are saved to the provided database.
//
// [1] https://cloud.google.com/run/docs/about-instance-autoscaling
// [2] https://cloud.google.com/tasks/docs
// o is short for ordered.Encode.
func o(list ...any) []byte { return ordered.Encode(list...) }
// A Client is responsible for dispatching
// and executing bisection tasks.
type Client struct {
slog *slog.Logger
db storage.DB
queue queue.Queue
testMu sync.Mutex
testClient *TestingClient
}
// New returns a new client for bisection.
// The client uses the given logger, database, and queue.
func New(lg *slog.Logger, db storage.DB, q queue.Queue) *Client {
return &Client{
slog: lg,
db: db,
queue: q,
}
}
// BisectAsync creates and spawns a bisection task for a bisection
// request.
//
// BisectAsync creates a [Task] and saves it to the database,
// and then triggers an asynchronous execution of [Client.Bisect]
// through [Client] queue.
func (c *Client) BisectAsync(ctx context.Context, breq *Request) error {
now := time.Now()
t := &Task{
Trigger: breq.Trigger,
Issue: breq.Issue,
Repository: breq.Repo,
Bad: breq.Fail,
Good: breq.Pass,
Regression: breq.Body,
Created: now,
Updated: now,
}
t.ID = newTaskID(t)
skey := string(o(taskKind, t.ID))
// Lock the task for sanity.
// This also helps with testing
// when enqueued bisection starts
// before BisectAsync saves the
// task to the database.
c.db.Lock(skey)
defer c.db.Unlock(skey)
ok, err := c.queue.Enqueue(ctx, t, &queue.Options{})
c.slog.Info("bisect.BisectAsync: enqueueing bisection task", "id", t.ID, "issue", t.Issue, "enqueued", ok)
if ok {
// Save the task only if it is enqueued.
t.Status = StatusQueued
c.save(t)
}
return err
}
// newTaskID creates a unique hex ID for t based
// on the repository, issue, trigger, command, and
// bisect commit information.
func newTaskID(t *Task) string {
hasher := sha256.New()
io.WriteString(hasher, t.Trigger)
io.WriteString(hasher, t.Repository)
io.WriteString(hasher, t.Issue)
io.WriteString(hasher, t.Good)
io.WriteString(hasher, t.Bad)
io.WriteString(hasher, t.Regression)
return hex.EncodeToString(hasher.Sum(nil))
}
// task returns [Task] with ID equal to id from the
// database, if such task exists. It returns nil otherwise.
func (c *Client) task(id string) (*Task, error) {
key := o(taskKind, id)
tj, ok := c.db.Get(key)
if !ok {
return nil, nil
}
var t Task
if err := json.Unmarshal(tj, &t); err != nil {
return nil, err
}
return &t, nil
}
// save the task to the database.
func (c *Client) save(t *Task) {
b := c.db.Batch()
key := o(taskKind, t.ID)
b.Set(key, storage.JSON(t))
timed.Set(c.db, b, taskUpdateKind, o(t.ID), nil)
b.Apply()
c.db.Flush()
}
// Bisect performs bisection on task with task id.
func (c *Client) Bisect(ctx context.Context, id string) error {
skey := string(o(taskKind, id))
// Lock the task just in case, so that
// no one else is bisecting it concurrently.
c.db.Lock(skey)
defer c.db.Unlock(skey)
t, err := c.task(id)
if err != nil || t == nil {
return fmt.Errorf("bisect.Bisect: task could not be found id=%s err=%v", id, err)
}
// Handle retries.
if t.Status != StatusQueued && t.Status != StatusFailed {
// Cloud Tasks will issue a retry after a deadline
// but it will not cancel an existing request. We
// skip such a retry attempt. For more info, see
// https://cloud.google.com/tasks/docs/dual-overview.
c.slog.Info("bisect.Bisect skipping retry", "id", id)
return nil
}
// TODO: If it has been more than cloud-task-deadline minutes
// since the task has been updated, assume the task was killed
// and restart the task from where it stopped the last time it
// was updated?
dir, err := os.MkdirTemp("", "bisect")
if err != nil {
return err
}
defer os.RemoveAll(dir)
c.slog.Info("bisect.Bisect started", "id", id)
t.Status = StatusStarted
c.save(t)
err = c.bisect(ctx, dir, t)
if err != nil {
t.Status = StatusFailed
t.Error = err.Error()
} else {
t.Status = StatusSucceeded
t.Commit = commitHash(t.Output)
}
c.save(t)
c.slog.Info("bisect.Bisect finished", "id", id, "err", t.Error, "commit", t.Commit)
return err
}
// bisectScript is a template that compiles a Go repo
// and runs a task regression in a gvisor sandbox against
// the repo. It should be instantiated with a unique
// sandbox identifier.
const bisectScript = `#!/bin/bash -eu
# This script expects that it is in the target go repository
# that sits at the top of bundle/rootfs/.
# Build go.
git clean -df
cd src
./make.bash || exit 125
# Go back to bundle and run the sandbox. The sandbox reads
# data from the bundle/rootfs but it cannot make changes
# to the directory visible outside of the sandbox.
cd ../../../
/usr/local/bin/runsc --ignore-cgroups --network=none --platform=systrap run sandbox%s
`
// bisect performs a bisection of t inside dir.
// It assumes that gvisor's bisect_config.json and
// its entry point bisect_runner are in the current
// working directory.
func (c *Client) bisect(ctx context.Context, dir string, t *Task) error {
if c.divertBisect() {
t.Output = c.testClient.Output
c.save(t)
// TODO: can we test this better?
return nil
}
// bundle is the place from which the
// sandbox must be created.
bundle := filepath.Join(dir, "bundle")
if err := os.Mkdir(bundle, 0o777); err != nil {
return err
}
// bisect_config.json must be present in
// the bundle as config.json.
if err := cp("bisect_config.json", filepath.Join(bundle, "config.json")); err != nil {
return err
}
// rootfs is the directory in bundle that
// will be the root of the sandbox execution.
rootfs := filepath.Join(bundle, "rootfs")
if err := os.Mkdir(rootfs, 0o777); err != nil {
return err
}
// Save the regression as the regression
// test in rootfs.
if err := os.WriteFile(filepath.Join(rootfs, "regression_test.go"), []byte(t.Regression), 0o666); err != nil {
return err
}
// Generate and save the bisection script in rootfs.
bisectCode := fmt.Sprintf(bisectScript, t.ID)
if err := os.WriteFile(filepath.Join(rootfs, "bisect.sh"), []byte(bisectCode), 0o750); err != nil {
return err
}
// Copy bisect_runner to rootfs as the entry
// point to the sandbox.
if err := cp("bisect_runner", filepath.Join(rootfs, "bisect_runner")); err != nil {
return err
}
c.slog.Info("bisect.Bisect: created and copied all the scripts", "id", t.ID, "bundle", bundle, "rootfs", rootfs)
// Clone the go repo to rootfs as go-bisect.
gobisect := filepath.Join(rootfs, "go-bisect")
if err := os.Mkdir(gobisect, 0o777); err != nil {
return err
}
goRepo, err := repo.Clone(ctx, c.slog, t.Repository, nil)
if err != nil {
return err
}
gitclone := exec.CommandContext(ctx, "git", "clone", "--reference="+goRepo.Dir(), "--dissociate", t.Repository, gobisect)
err = run(gitclone, t, c)
goRepo.Release()
if err != nil {
return err
}
c.slog.Info("bisect.Bisect: cloned the go repo", "id", t.ID, "dir", gobisect)
// Initialize git bisect.
bisectstart := exec.CommandContext(ctx, "git", "bisect", "start", t.Bad, t.Good)
bisectstart.Dir = gobisect
if err := run(bisectstart, t, c); err != nil {
return err
}
c.slog.Info("bisect.Bisect: initialized git bisect", "id", t.ID)
// Run git bisect.
bisectrun := exec.CommandContext(ctx, "git", "bisect", "run", "../bisect.sh")
bisectrun.Dir = gobisect
if err := run(bisectrun, t, c); err != nil {
return err
}
return nil
}
// run runs cmd while simultaneously
// listening and saving cmd's output
// to t.Output.
func run(cmd *exec.Cmd, t *Task, c *Client) error {
// TODO: also read from stderr.
stdout, err := cmd.StdoutPipe()
if err != nil {
return err
}
if err := cmd.Start(); err != nil {
return err
}
// TODO: avoid using scanner and appending
// strings for commands with large ouput
// with long lines.
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
if err := scanner.Err(); err != nil {
return err
}
m := scanner.Text()
t.Output += m + "\n"
t.Updated = time.Now()
c.save(t)
}
if err := scanner.Err(); err != nil {
return err
}
return cmd.Wait()
}
// cp copies the src file to dst
// location. The destination file
// is freshly created with the same
// permissions as the source file.
func cp(src, dst string) error {
info, err := os.Stat(src)
if err != nil {
return err
}
s, err := os.ReadFile(src)
if err != nil {
return err
}
return os.WriteFile(dst, s, info.Mode())
}
var (
// badCommitRegexp matches and extracts the hash of
// the (first) bad commit identified by git bisect.
badCommitRegexp = regexp.MustCompile("(.+) is the first bad commit")
)
// commitHash extracts the hash of the commit
// identified by git bisect.
func commitHash(output string) string {
matches := badCommitRegexp.FindAllStringSubmatch(output, -1)
if len(matches) != 1 {
return ""
}
rmatches := matches[0]
if len(rmatches) != 2 {
return ""
}
return rmatches[1]
}