internal/worker: add more info to home page
To understand worker behavior better, add more information
the worker home page.
- The name of the pod (hostname), so we can know which pod we're
looking at.
- More memory statistics, to understand why workers restart.
- Information about all the fetches in progess, and finished within
the last minute.
Change-Id: I38098069f5e03166971e67ad53a64c539ef8b5cf
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/258017
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/content/static/html/worker/index.tmpl b/content/static/html/worker/index.tmpl
index f9e1c71..f6bc914 100644
--- a/content/static/html/worker/index.tmpl
+++ b/content/static/html/worker/index.tmpl
@@ -74,6 +74,8 @@
<tr><td>DB Host</td><td>{{.Config.DBHost}}</td></tr>
<tr><td>Redis Cache Host</td><td>{{.Config.RedisCacheHost}}</td></tr>
<tr><td>Redis HA Host</td><td>{{.Config.RedisHAHost}}</td></tr>
+ <tr><td>Hostname (Pod)</td><td>{{.Hostname}}</td></tr>
+ <tr><td>Age</td><td>{{timeSince .StartTime}}</td></tr>
</table>
</div>
@@ -151,12 +153,60 @@
</div>
<div>
- <h3>Memory</h3>
+ <h3>Memory (all values in Mi)</h3>
<table>
- <tr><td>Go Heap</td><td>{{.GoMemStats.HeapAlloc | bytesToMi}} Mi</td></tr>
- <tr><td>Container Limit</td><td>{{index .CgroupStats "limit" | bytesToMi}} Mi</td></tr>
- <tr><td>Container Used</td><td>{{index .CgroupStats "usage" | bytesToMi}} Mi</td></tr>
- <tr><td>Container Working Set</td><td>{{index .CgroupStats "workingSet" | bytesToMi}} Mi</td></tr>
+ <tr>
+ <td>Go Heap</td>
+ <td>
+ {{.GoMemStats.HeapAlloc | bytesToMi}} Alloc /
+ {{.GoMemStats.HeapInuse | bytesToMi}} In Use /
+ {{.GoMemStats.HeapIdle | bytesToMi}} Idle /
+ {{.GoMemStats.HeapReleased | bytesToMi}} Released
+ </td>
+ </tr>
+ <tr>
+ <td>Container</td>
+ <td>
+ {{index .CgroupStats "limit" | bytesToMi}} Limit /
+ {{index .CgroupStats "trueRSS" | bytesToMi}} RSS /
+ {{index .CgroupStats "usage" | bytesToMi}} Used /
+ {{index .CgroupStats "workingSet" | bytesToMi}} Working Set
+ </td>
+ </tr>
+ </table>
+ </div>
+
+ <div>
+ <h3>Fetches</h3>
+ <table>
+ <thead>
+ <tr>
+ <th>Path</th>
+ <th>Version</th>
+ <th>Zip Size (Mi)</th>
+ <th>Age</th>
+ <th>Status</th>
+ <th>Error</th>
+ </tr>
+ </thead>
+ <tbody>
+ {{range .Fetches}}
+ <tr>
+ <td>{{.ModulePath}}</td>
+ <td>{{.Version}}</td>
+ <td>{{.ZipSize | bytesToMi}}</td>
+ <td>
+ {{if eq .Status 0}}
+ {{timeSince .Start}}
+ {{else}}
+ {{timeSub .Finish .Start}}
+ {{end}}
+ </td>
+ <td>{{if ne .Status 0}}{{.Status}}{{end}}</td>
+ <td>{{if ge .Status 500}}{{.Error}}{{end}}</td>
+ </tr>
+ {{end}}
+ </tbody>
</table>
</div>
diff --git a/internal/fetch/fetch.go b/internal/fetch/fetch.go
index 3b9c011..67cfa16 100644
--- a/internal/fetch/fetch.go
+++ b/internal/fetch/fetch.go
@@ -12,7 +12,9 @@
"fmt"
"net/http"
"path"
+ "sort"
"strconv"
+ "sync"
"time"
"go.opencensus.io/plugin/ochttp"
@@ -114,6 +116,7 @@
RequestedVersion: requestedVersion,
Defer: func() {},
}
+ var fi *FetchInfo
defer func() {
if fr.Error != nil {
derrors.Wrap(&fr.Error, "FetchModule(%q, %q)", modulePath, requestedVersion)
@@ -127,6 +130,9 @@
if fr.Status < 300 {
stats.Record(ctx, fetchedPackages.M(int64(len(fr.PackageVersionStates))))
}
+ if fi != nil {
+ finishFetchInfo(fi, fr.Status, fr.Error)
+ }
log.Debugf(ctx, "memory after fetch of %s@%s: %dM", modulePath, requestedVersion, allocMeg())
}()
@@ -180,6 +186,14 @@
}
// Proceed with the fetch.
+ fi = &FetchInfo{
+ ModulePath: modulePath,
+ Version: fr.ResolvedVersion,
+ ZipSize: uint64(zipSize),
+ Start: time.Now(),
+ }
+ startFetchInfo(fi)
+
if modulePath == stdlib.ModulePath {
zipReader, commitTime, err = stdlib.Zip(requestedVersion)
if err != nil {
@@ -317,3 +331,67 @@
}
return false
}
+
+type FetchInfo struct {
+ ModulePath string
+ Version string
+ ZipSize uint64
+ Start time.Time
+ Finish time.Time
+ Status int
+ Error error
+}
+
+var (
+ fetchInfoMu sync.Mutex
+ fetchInfoMap = map[*FetchInfo]struct{}{}
+)
+
+func init() {
+ const linger = time.Minute
+ go func() {
+ for {
+ now := time.Now()
+ fetchInfoMu.Lock()
+ for fi := range fetchInfoMap {
+ if !fi.Finish.IsZero() && now.Sub(fi.Finish) > linger {
+ delete(fetchInfoMap, fi)
+ }
+ }
+ fetchInfoMu.Unlock()
+ time.Sleep(linger)
+ }
+ }()
+}
+
+func startFetchInfo(fi *FetchInfo) {
+ fetchInfoMu.Lock()
+ defer fetchInfoMu.Unlock()
+ fetchInfoMap[fi] = struct{}{}
+}
+
+func finishFetchInfo(fi *FetchInfo, status int, err error) {
+ fetchInfoMu.Lock()
+ defer fetchInfoMu.Unlock()
+ fi.Finish = time.Now()
+ fi.Status = status
+ fi.Error = err
+}
+
+// FetchInfos returns information about all fetches in progress,
+// sorted by start time.
+func FetchInfos() []*FetchInfo {
+ var fis []*FetchInfo
+ fetchInfoMu.Lock()
+ for fi := range fetchInfoMap {
+ // Copy to avoid races on Status and Error when read by
+ // worker home page.
+ cfi := *fi
+ fis = append(fis, &cfi)
+ }
+ fetchInfoMu.Unlock()
+ sort.Slice(fis, func(i, j int) bool {
+ return fis[i].Start.Before(fis[j].Start)
+ })
+ return fis
+}
diff --git a/internal/fetch/sourcefiles.go b/internal/fetch/sourcefiles.go
index c4dec5a..b0e24fb 100644
--- a/internal/fetch/sourcefiles.go
+++ b/internal/fetch/sourcefiles.go
@@ -41,6 +41,7 @@
&ast.FuncLit{},
&ast.FuncType{},
&ast.GenDecl{},
+ &ast.GoStmt{},
&ast.KeyValueExpr{},
&ast.IfStmt{},
&ast.ImportSpec{},
diff --git a/internal/worker/memory.go b/internal/worker/memory.go
index a71fae8..88fc382 100644
--- a/internal/worker/memory.go
+++ b/internal/worker/memory.go
@@ -132,6 +132,8 @@
workingSet -= tif
}
m["workingSet"] = workingSet
+ // True RSS. See note on https://lwn.net/Articles/432224.
+ m["trueRSS"] = m["rss"] + m["mapped_file"]
return m
}
diff --git a/internal/worker/pages.go b/internal/worker/pages.go
index ef51651..eb863b7 100644
--- a/internal/worker/pages.go
+++ b/internal/worker/pages.go
@@ -31,6 +31,8 @@
msg string
}
+var startTime = time.Now()
+
// doIndexPage writes the status page. On error it returns the error and a short
// string to be written back to the client.
func (s *Server) doIndexPage(w http.ResponseWriter, r *http.Request) (err error) {
@@ -76,6 +78,8 @@
ResourcePrefix string
LatestTimestamp *time.Time
LocationID string
+ Hostname string
+ StartTime time.Time
Experiments []*internal.Experiment
ExperimentsFromConfig bool
Excluded []string
@@ -84,11 +88,14 @@
ProcessStats processMemStats
SystemStats systemMemStats
CgroupStats map[string]uint64
+ Fetches []*fetch.FetchInfo
}{
Config: s.cfg,
Env: env(s.cfg),
ResourcePrefix: strings.ToLower(env(s.cfg)) + "-",
LocationID: s.cfg.LocationID,
+ Hostname: os.Getenv("HOSTNAME"),
+ StartTime: startTime,
Experiments: experiments,
ExperimentsFromConfig: os.Getenv("GO_DISCOVERY_EXPERIMENTS_FROM_CONFIG") == "true",
Excluded: excluded,
@@ -97,6 +104,7 @@
ProcessStats: pms,
SystemStats: sms,
CgroupStats: getCgroupMemStats(),
+ Fetches: fetch.FetchInfos(),
}
return renderPage(ctx, w, page, s.templates[indexTemplate])
}
diff --git a/internal/worker/server.go b/internal/worker/server.go
index cdf00cc..a57a387 100644
--- a/internal/worker/server.go
+++ b/internal/worker/server.go
@@ -529,6 +529,12 @@
"timefmt": formatTime,
"bytesToMi": bytesToMi,
"pct": percentage,
+ "timeSince": func(t time.Time) time.Duration {
+ return time.Since(t).Round(time.Second)
+ },
+ "timeSub": func(t1, t2 time.Time) time.Duration {
+ return t1.Sub(t2).Round(time.Second)
+ },
}).ParseFilesFromTrustedSources(templatePath)
}