| // Copyright 2018 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // +build go1.13 |
| // +build linux darwin |
| |
| package main |
| |
| import ( |
| "context" |
| "log" |
| "sync" |
| "time" |
| |
| "golang.org/x/build/buildlet" |
| "golang.org/x/build/cmd/coordinator/spanlog" |
| "golang.org/x/build/dashboard" |
| "golang.org/x/build/internal/buildgo" |
| ) |
| |
| // useScheduler controls whether we actually use the scheduler. This |
| // is temporarily false during development. Once we're happy with it |
| // we'll delete this const. |
| // |
| // If false, any GetBuildlet call to the schedule delegates directly |
| // to the BuildletPool's GetBuildlet and we make a bunch of callers |
| // fight over a mutex and a random one wins, like we used to do it. |
| const useScheduler = false |
| |
| // The Scheduler prioritizes access to buidlets. It accepts requests |
| // for buildlets, starts the creation of buildlets from BuildletPools, |
| // and prioritizes which callers gets them first when they're ready. |
| type Scheduler struct { |
| // mu guards waiting and hostsCreating. |
| mu sync.Mutex |
| |
| // waiting contains all the set of callers who are waiting for |
| // a buildlet, keyed by the host type they're waiting for. |
| waiting map[string]map[*SchedItem]bool // hostType -> item -> true |
| |
| // hostsCreating is the number of GetBuildlet calls currently in flight |
| // to each hostType's respective buildlet pool. |
| hostsCreating map[string]int // hostType -> count |
| } |
| |
| // A getBuildletResult is a buildlet that was just created and is up and |
| // is ready to be assigned to a caller based on priority. |
| type getBuildletResult struct { |
| Pool BuildletPool |
| HostType string |
| |
| // One of Client or Err gets set: |
| Client *buildlet.Client |
| Err error |
| } |
| |
| // NewScheduler returns a new scheduler. |
| func NewScheduler() *Scheduler { |
| s := &Scheduler{ |
| hostsCreating: make(map[string]int), |
| waiting: make(map[string]map[*SchedItem]bool), |
| } |
| return s |
| } |
| |
| // matchBuildlet matches up a successful getBuildletResult to the |
| // highest priority waiter, or closes it if there is none. |
| func (s *Scheduler) matchBuildlet(res getBuildletResult) { |
| if res.Err != nil { |
| go s.schedule() |
| return |
| } |
| for { |
| waiter, ok := s.matchWaiter(res.HostType) |
| if !ok { |
| log.Printf("sched: no waiter for buildlet of type %q; closing", res.HostType) |
| go res.Client.Close() |
| return |
| } |
| select { |
| case waiter.res <- res.Client: |
| // Normal happy case. Something gets its buildlet. |
| return |
| case <-waiter.ctxDone: |
| // Waiter went away in the tiny window between |
| // matchWaiter returning it and here. This |
| // should happen super rarely, so log it to verify that. |
| log.Printf("sched: waiter of type %T went away; trying to match next", res.HostType) |
| } |
| } |
| } |
| |
| // schedule starts creating buildlets if there's demand. |
| // |
| // It acquires s.mu. |
| func (s *Scheduler) schedule() { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| s.scheduleLocked() |
| } |
| |
| // scheduleLocked starts creating buildlets if there's demand. |
| // |
| // It requires that s.mu be held. |
| func (s *Scheduler) scheduleLocked() { |
| for hostType, waiting := range s.waiting { |
| need := len(waiting) - s.hostsCreating[hostType] |
| if need <= 0 { |
| continue |
| } |
| pool := poolForConf(dashboard.Hosts[hostType]) |
| // TODO: recognize certain pools like the reverse pool |
| // that have finite capacity and will just queue up |
| // GetBuildlet calls anyway and avoid extra goroutines |
| // here and just cap the number of outstanding |
| // GetBuildlet calls. But even with thousands of |
| // outstanding builds, that's a small constant memory |
| // savings, so for now just do the simpler thing. |
| for i := 0; i < need; i++ { |
| s.hostsCreating[hostType]++ |
| go s.getPoolBuildlet(pool, hostType) |
| } |
| } |
| } |
| |
| type stderrLogger struct{} |
| |
| func (stderrLogger) LogEventTime(event string, optText ...string) { |
| if len(optText) == 0 { |
| log.Printf("sched.getbuildlet: %v", event) |
| } else { |
| log.Printf("sched.getbuildlet: %v, %v", event, optText[0]) |
| } |
| } |
| |
| func (l stderrLogger) CreateSpan(event string, optText ...string) spanlog.Span { |
| return createSpan(l, event, optText...) |
| } |
| |
| func (s *Scheduler) getPoolBuildlet(pool BuildletPool, hostType string) { |
| res := getBuildletResult{ |
| Pool: pool, |
| HostType: hostType, |
| } |
| ctx := context.Background() // TODO: make these cancelable and cancel unneeded ones earlier? |
| res.Client, res.Err = pool.GetBuildlet(ctx, hostType, stderrLogger{}) |
| s.matchBuildlet(res) |
| } |
| |
| // matchWaiter returns (and removes from the waiting queue) the highest priority SchedItem |
| // that matches the provided host type. |
| func (s *Scheduler) matchWaiter(hostType string) (_ *SchedItem, ok bool) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| var best *SchedItem |
| for si := range s.waiting[hostType] { |
| if best == nil || schedLess(si, best) { |
| best = si |
| } |
| } |
| return best, best != nil |
| } |
| |
| func (s *Scheduler) removeWaiter(si *SchedItem) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| if m := s.waiting[si.HostType]; m != nil { |
| delete(m, si) |
| } |
| } |
| |
| func (s *Scheduler) enqueueWaiter(si *SchedItem) { |
| s.mu.Lock() |
| defer s.mu.Unlock() |
| if _, ok := s.waiting[si.HostType]; !ok { |
| s.waiting[si.HostType] = make(map[*SchedItem]bool) |
| } |
| s.waiting[si.HostType][si] = true |
| s.scheduleLocked() |
| } |
| |
| // schedLess reports whether scheduled item ia is "less" (more |
| // important) than scheduled item ib. |
| func schedLess(ia, ib *SchedItem) bool { |
| // TODO: flesh out this policy more. For now this is much |
| // better than the old random policy. |
| // For example, consider IsHelper? Figure out a policy. |
| |
| // Gomote is most important, then TryBots, then FIFO for |
| // either Gomote/Try, else LIFO for post-submit builds. |
| if ia.IsGomote != ib.IsGomote { |
| return ia.IsGomote |
| } |
| if ia.IsTry != ib.IsTry { |
| return ia.IsTry |
| } |
| // Gomote and TryBots are FIFO. |
| if ia.IsGomote || ia.IsTry { |
| // TODO: if IsTry, consider how many TryBot requests |
| // are outstanding per user. The scheduler should |
| // round-robin between CL authors, rather than use |
| // time. But time works for now. |
| return ia.requestTime.Before(ib.requestTime) |
| } |
| // Post-submit builds are LIFO. |
| return ib.requestTime.Before(ia.requestTime) |
| } |
| |
| // SchedItem is a specification of a requested buildlet in its |
| // exported fields, and internal scheduler state used while waiting |
| // for that buildlet. |
| type SchedItem struct { |
| buildgo.BuilderRev // not set for gomote |
| HostType string |
| IsGomote bool |
| IsTry bool |
| IsHelper bool |
| |
| // We set in GetBuildlet: |
| s *Scheduler |
| requestTime time.Time |
| tryFor string // which user. (user with 1 trybot >> user with 50 trybots) |
| pool BuildletPool |
| ctxDone <-chan struct{} |
| // TODO: track the commit time of the BuilderRev, via call to maintnerd probably |
| // commitTime time.Time |
| |
| // res is the result channel, containing either a |
| // *buildlet.Client or an error. It is read by GetBuildlet and |
| // written by assignBuildlet. |
| res chan interface{} |
| } |
| |
| func (si *SchedItem) cancel() { |
| si.s.removeWaiter(si) |
| } |
| |
| // GetBuildlet requests a buildlet with the parameters described in si. |
| // |
| // The provided si must be newly allocated; ownership passes to the scheduler. |
| func (s *Scheduler) GetBuildlet(ctx context.Context, lg logger, si *SchedItem) (*buildlet.Client, error) { |
| pool := poolForConf(dashboard.Hosts[si.HostType]) |
| |
| if !useScheduler { |
| return pool.GetBuildlet(ctx, si.HostType, lg) |
| } |
| |
| si.pool = pool |
| si.s = s |
| si.requestTime = time.Now() |
| si.res = make(chan interface{}) // NOT buffered |
| si.ctxDone = ctx.Done() |
| |
| // TODO: once we remove the useScheduler const, we can |
| // remove the "lg" logger parameter. We don't need to |
| // log anything during the buildlet creation process anymore |
| // because we don't which build it'll be for. So all we can |
| // say in the logs for is "Asking for a buildlet" and "Got |
| // one", which the caller already does. I think. Verify that. |
| |
| s.enqueueWaiter(si) |
| select { |
| case v := <-si.res: |
| if bc, ok := v.(*buildlet.Client); ok { |
| return bc, nil |
| } |
| return nil, v.(error) |
| case <-ctx.Done(): |
| si.cancel() |
| return nil, ctx.Err() |
| } |
| } |