internal/relui: modify advisory trybots
This change introduces retries to each advisory trybot run. A trybot
run will be attempted 3 times before it fails. If it fails then the
operator will be presented with the option to approve the failed
trybot run. There is a follow up task which waits for all advisory
trybot runs to be approved or pass before it continues processing the
workflow.
Fixes golang/go#57725
Change-Id: I2250289f5d597c7cb493d0267e451691548589c7
Reviewed-on: https://go-review.googlesource.com/c/build/+/463535
Run-TryBot: Carlos Amedee <carlos@golang.org>
Reviewed-by: Carlos Amedee <carlos@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
diff --git a/internal/relui/buildrelease_test.go b/internal/relui/buildrelease_test.go
index d9d9a4a..9c62bea 100644
--- a/internal/relui/buildrelease_test.go
+++ b/internal/relui/buildrelease_test.go
@@ -433,7 +433,7 @@
defaultApprove := deps.buildTasks.ApproveAction
approvedTrybots := false
deps.buildTasks.ApproveAction = func(ctx *workflow.TaskContext) error {
- if strings.Contains(ctx.TaskName, "TryBot failures") {
+ if strings.Contains(ctx.TaskName, "Run advisory TryBot") {
approvedTrybots = true
return nil
}
diff --git a/internal/relui/workflows.go b/internal/relui/workflows.go
index f03db11..47a3226 100644
--- a/internal/relui/workflows.go
+++ b/internal/relui/workflows.go
@@ -16,6 +16,7 @@
"math/rand"
"net/http"
"path"
+ "sort"
"strings"
"sync"
"time"
@@ -448,7 +449,7 @@
result := wf.Task3(wd, "Run advisory TryBot "+bc.Name, tasks.runAdvisoryTryBot, wf.Const(bc), skipTests, source)
advisoryResults = append(advisoryResults, result)
}
- tryBotsApproved := wf.Action1(wd, "Approve any TryBot failures", tasks.checkAdvisoryTrybots, wf.Slice(advisoryResults...))
+ tryBotsApproved := wf.Action1(wd, "Wait for advisory TryBots", tasks.checkAdvisoryTrybots, wf.Slice(advisoryResults...))
signedAndTested := wf.Task2(wd, "Wait for signing and tests", func(ctx *wf.TaskContext, artifacts []artifact, version string) ([]artifact, error) {
// Note: Note this needs to happen somewhere, doesn't matter where. Maybe move it to a nicer place later.
@@ -761,14 +762,23 @@
return tryBotResult{bc.Name, true}, nil
}
}
-
passed := false
- _, err := b.runBuildStep(ctx, nil, bc, source, "", func(bs *task.BuildletStep, r io.Reader, w io.Writer) error {
- var err error
- passed, err = bs.RunTryBot(ctx, r)
- return err
- })
- return tryBotResult{bc.Name, passed}, err
+ for attempt := 1; attempt <= workflow.MaxRetries && !passed; attempt++ {
+ ctx.Printf("======== Trybot Attempt %d of %d ========\n", attempt, workflow.MaxRetries)
+ _, err := b.runBuildStep(ctx, nil, bc, source, "", func(bs *task.BuildletStep, r io.Reader, w io.Writer) error {
+ var err error
+ passed, err = bs.RunTryBot(ctx, r)
+ return err
+ })
+ if err != nil {
+ ctx.Printf("Trybot Attempt failed: %v\n", err)
+ }
+ }
+ if !passed {
+ ctx.Printf("Advisory TryBot failed. Check the logs and approve this task if it's okay:\n")
+ return tryBotResult{bc.Name, passed}, b.ApproveAction(ctx)
+ }
+ return tryBotResult{bc.Name, passed}, nil
}
func (b *BuildReleaseTasks) checkAdvisoryTrybots(ctx *wf.TaskContext, results []tryBotResult) error {
@@ -778,11 +788,12 @@
fails = append(fails, r.Name)
}
}
- if len(fails) == 0 {
+ if len(fails) != 0 {
+ sort.Strings(fails)
+ ctx.Printf("Some advisory TryBots failed and their failures have been approved:\n%v", strings.Join(fails, "\n"))
return nil
}
- ctx.Printf("Some advisory TryBots failed. Check their logs and approve this task if it's okay:\n%v", strings.Join(fails, "\n"))
- return b.ApproveAction(ctx)
+ return nil
}
// runBuildStep is a convenience function that manages resources a build step might need.