blob: 66b0eb72ec467cbfbc517fb62b98776c18ddc953 [file] [log] [blame]
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package bisect is used for bisecting a target repository
// with the goal of finding a commit introducing a regression.
package bisect
import (
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"log/slog"
"time"
"golang.org/x/oscar/internal/github"
"golang.org/x/oscar/internal/queue"
"golang.org/x/oscar/internal/storage"
"golang.org/x/oscar/internal/storage/timed"
"rsc.io/ordered"
)
const (
taskKind = "bisection.Task"
taskUpdateKind = "bisection.TaskUpdate" // used for storing task updates in a timed db
)
// This package stores the following key schemas in the database:
//
// ["bisection.Task", ID] => JSON of Task structure
// ["bisection.TaskUpdateByTime", DBTime, ID] => []
//
// Bisecting a repository for a change regression can take considerable
// time. This has an effect on how the bisection is run in gaby. If
// bisection is being run as part of a batch job, other jobs will be
// blocked by the bisection. Spawning a bisection in a goroutine
// or a process will in principle not work on Cloud Run, which can
// move or kill a gaby instance if there are no requests served [1],
// even if several bisections are being ran in the background.
//
// This package addresses this problem by asynchronous bisection.
// [Client.BisectAsync] spawns a bisection [Task] by sending it to
// a [queue.Queue], which in practice will be a Cloud Tasks [2]
// queue. The latter will then send a request to gaby, which in
// turn will call [Client.Bisect]. The results and partial progress
// of bisection are saved to the provided database.
//
// [1] https://cloud.google.com/run/docs/about-instance-autoscaling
// [2] https://cloud.google.com/tasks/docs
// o is short for ordered.Encode.
func o(list ...any) []byte { return ordered.Encode(list...) }
// A Client is responsible for dispatching
// and executing bisection tasks.
type Client struct {
slog *slog.Logger
db storage.DB
queue queue.Queue
}
// New returns a new client for bisection.
// The client uses the given logger, database, and queue.
func New(lg *slog.Logger, db storage.DB, q queue.Queue) *Client {
return &Client{
slog: lg,
db: db,
queue: q,
}
}
// BisectAsync creates and spawns a bisection task for trigger
// if the latter encodes a request for bisection. Otherwise, it
// does nothing and returns nil.
//
// BisectAsync creates a [Task] and saves it to the database,
// and then triggers an asynchronous execution of [Client.Bisect]
// through [Client] queue.
//
// TODO: generalize trigger beyond GitHub issue comment.
func (c *Client) BisectAsync(ctx context.Context, trigger *github.IssueComment) error {
if trigger.Project() != "golang/go" {
return fmt.Errorf("bisect.Add: only golang/go repo currently supported, got '%s'", trigger.Project())
}
now := time.Now()
t := &Task{
Trigger: trigger.URL,
Issue: trigger.IssueURL,
Repository: "https://go.googlesource.com/go",
Bad: "master",
Good: "go1.22.0",
Regression: regression(trigger.Body),
Created: now,
Updated: now,
}
t.ID = newTaskID(t)
skey := string(o(taskKind, t.ID))
// Lock the task for sanity.
// This also helps with testing
// when enqueued bisection starts
// before BisectAsync saves the
// task to the database.
c.db.Lock(skey)
defer c.db.Unlock(skey)
ok, err := c.queue.Enqueue(ctx, t, &queue.Options{})
c.slog.Info("bisect.BisectAsync: enqueueing bisection task", "id", t.ID, "issue", t.Issue, "enqueued", ok)
if ok {
// Save the task only if it is enqueued.
t.Status = StatusQueued
c.save(t)
}
return err
}
// regression extracts a bisection
// test code from body.
func regression(body string) string {
// For now, assume the body is
// the regression code.
return body
}
// newTaskID creates a unique ID for t based on
// the repository, issue, trigger, command, and
// bisect commit information.
func newTaskID(t *Task) string {
hasher := sha256.New()
io.WriteString(hasher, t.Trigger)
io.WriteString(hasher, t.Repository)
io.WriteString(hasher, t.Issue)
io.WriteString(hasher, t.Good)
io.WriteString(hasher, t.Bad)
io.WriteString(hasher, t.Regression)
return hex.EncodeToString(hasher.Sum(nil))
}
// task returns [Task] with ID equal to id from the
// database, if such task exists. It returns nil otherwise.
func (c *Client) task(id string) (*Task, error) {
key := o(taskKind, id)
tj, ok := c.db.Get(key)
if !ok {
return nil, nil
}
var t Task
if err := json.Unmarshal(tj, &t); err != nil {
return nil, err
}
return &t, nil
}
// save the task to the database.
func (c *Client) save(t *Task) {
b := c.db.Batch()
key := o(taskKind, t.ID)
b.Set(key, storage.JSON(t))
timed.Set(c.db, b, taskUpdateKind, o(t.ID), nil)
b.Apply()
c.db.Flush()
}
// Bisect performs bisection on task with task id.
func (c *Client) Bisect(id string) error {
skey := string(o(taskKind, id))
// Lock the task just in case, so that
// no one else is bisecting it concurrently.
c.db.Lock(skey)
defer c.db.Unlock(skey)
t, err := c.task(id)
if err != nil || t == nil {
return fmt.Errorf("bisect.Bisect: task could not be found id=%s err=%v", id, err)
}
// TODO: implement bisection logic
// TODO: if a task with the t.ID already exists and it has been more
// than cloud-task-deadline minutes since the task has been updated,
// assume the task was killed and restart the task from where it
// stopped the last time it was updated?
return nil
}