internal/upload: standardize logging and identify an upload bug
This CL includes a new TODO identifying a new bug that was preventing
uploads from my machine, as well as all the logging changes I made such
that I could identify the failure from debug logs.
A subsequent CL will fix the bug.
Change-Id: I3c1d3f82894888b45f2b94638948ee950a74cfff
Reviewed-on: https://go-review.googlesource.com/c/telemetry/+/584399
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/internal/upload/date.go b/internal/upload/date.go
index 4fc770f..1632b96 100644
--- a/internal/upload/date.go
+++ b/internal/upload/date.go
@@ -38,7 +38,7 @@
//
// TODO(rfindley): just return an error to make this explicit.
func (u *Uploader) counterDateSpan(fname string) (begin, end time.Time) {
- parsed, err := u.parse(fname)
+ parsed, err := u.parseCountFile(fname)
if err != nil {
u.logger.Printf("expiry Parse: %v for %s", err, fname)
return time.Time{}, farFuture
@@ -68,7 +68,7 @@
m map[string]*counter.File
}
-func (u *Uploader) parse(fname string) (*counter.File, error) {
+func (u *Uploader) parseCountFile(fname string) (*counter.File, error) {
u.cache.mu.Lock()
defer u.cache.mu.Unlock()
if u.cache.m == nil {
diff --git a/internal/upload/findwork.go b/internal/upload/findwork.go
index 22add2a..43586f8 100644
--- a/internal/upload/findwork.go
+++ b/internal/upload/findwork.go
@@ -41,9 +41,10 @@
if strings.HasSuffix(fi.Name(), ".v1.count") {
fname := filepath.Join(localdir, fi.Name())
if u.stillOpen(fname) {
- u.logger.Printf("Skipping count file %s: still active", fname)
+ u.logger.Printf("Skipping count file %s: still active", fi.Name())
continue
}
+ u.logger.Printf("Collecting count file %s", fi.Name())
ans.countfiles = append(ans.countfiles, fname)
} else if strings.HasPrefix(fi.Name(), "local.") {
// skip
@@ -63,7 +64,7 @@
//
// TODO(rfindley): store the begin date in reports, so that we can
// verify this assumption.
- u.logger.Printf("uploadable %s", fi.Name())
+ u.logger.Printf("Uploadable: %s", fi.Name())
ans.readyfiles = append(ans.readyfiles, filepath.Join(localdir, fi.Name()))
}
} else {
@@ -73,7 +74,7 @@
// TODO(rfindley): invert this logic following more testing. We
// should only upload if we know both the asof date and the report
// date, and they are acceptable.
- u.logger.Printf("uploadable anyway %s", fi.Name())
+ u.logger.Printf("Uploadable (missing date): %s", fi.Name())
ans.readyfiles = append(ans.readyfiles, filepath.Join(localdir, fi.Name()))
}
}
@@ -89,6 +90,7 @@
ans.uploaded = make(map[string]bool)
for _, fi := range fis {
if strings.HasSuffix(fi.Name(), ".json") {
+ u.logger.Printf("Already uploaded: %s", fi.Name())
ans.uploaded[fi.Name()] = true
}
}
diff --git a/internal/upload/reports.go b/internal/upload/reports.go
index 4052bb0..262cc30 100644
--- a/internal/upload/reports.go
+++ b/internal/upload/reports.go
@@ -31,7 +31,7 @@
if lastWeek >= today { //should never happen
lastWeek = ""
}
- u.logger.Printf("lastWeek %q, today %s", lastWeek, today)
+ u.logger.Printf("Last week: %s, today: %s", lastWeek, today)
countFiles := make(map[string][]string) // expiry date string->filenames
earliest := make(map[string]time.Time) // earliest begin time for any counter
for _, f := range todo.countfiles {
@@ -47,7 +47,7 @@
}
for expiry, files := range countFiles {
if notNeeded(expiry, *todo) {
- u.logger.Printf("files for %s not needed, deleting %v", expiry, files)
+ u.logger.Printf("Files for %s not needed, deleting %v", expiry, files)
// The report already exists.
// There's another check in createReport.
u.deleteFiles(files)
@@ -58,6 +58,7 @@
return nil, err
}
if fname != "" {
+ u.logger.Printf("Ready to upload: %s", filepath.Base(fname))
todo.readyfiles = append(todo.readyfiles, fname)
}
}
@@ -109,21 +110,21 @@
// createReport for all the count files for the same date.
// returns the absolute path name of the file containing the report
-func (u *Uploader) createReport(start time.Time, expiryDate string, files []string, lastWeek string) (string, error) {
+func (u *Uploader) createReport(start time.Time, expiryDate string, countFiles []string, lastWeek string) (string, error) {
uploadOK := true
mode, asof := u.dir.Mode()
if mode != "on" {
- u.logger.Printf("no upload config or mode %q is not 'on'", mode)
+ u.logger.Printf("No upload config or mode %q is not 'on'", mode)
uploadOK = false // no config, nothing to upload
}
if u.tooOld(expiryDate, u.startTime) {
- u.logger.Printf("expiryDate %s is too old", expiryDate)
+ u.logger.Printf("Expiry date %s is too old", expiryDate)
uploadOK = false
}
// If the mode is recorded with an asof date, don't upload if the report
// includes any data on or before the asof date.
if !asof.IsZero() && !asof.Before(start) {
- u.logger.Printf("asof %s is not before start %s", asof, start)
+ u.logger.Printf("As-of date %s is not before start %s", asof, start)
uploadOK = false
}
// should we check that all the x.Meta are consistent for GOOS, GOARCH, etc?
@@ -134,14 +135,14 @@
LastWeek: lastWeek,
}
if report.X > u.config.SampleRate && u.config.SampleRate > 0 {
- u.logger.Printf("X:%f > SampleRate:%f, not uploadable", report.X, u.config.SampleRate)
+ u.logger.Printf("X: %f > SampleRate:%f, not uploadable", report.X, u.config.SampleRate)
uploadOK = false
}
var succeeded bool
- for _, f := range files {
- x, err := u.parse(string(f))
+ for _, f := range countFiles {
+ x, err := u.parseCountFile(f)
if err != nil {
- u.logger.Printf("unparseable (%v) %s", err, f)
+ u.logger.Printf("Unparseable count file %s: %v", filepath.Base(f), err)
continue
}
prog := findProgReport(x.Meta, report)
@@ -156,19 +157,24 @@
succeeded = true
}
}
+ // TODO(rfindley): There's a bug here: we return an error if a count file
+ // parses, but has no counter.
+ //
+ // Furthermore, this error causes us to bail out, and return no reports. We
+ // should only fail the report containing the count file.
if !succeeded {
- return "", fmt.Errorf("all %d count files were unparseable", len(files))
+ return "", fmt.Errorf("all %d count files for %s were unparseable", len(countFiles), expiryDate)
}
// 1. generate the local report
localContents, err := json.MarshalIndent(report, "", " ")
if err != nil {
- return "", fmt.Errorf("failed to marshal report (%v)", err)
+ return "", fmt.Errorf("failed to marshal report for %s: %v", expiryDate, err)
}
// check that the report can be read back
// TODO(pjw): remove for production?
var report2 telemetry.Report
if err := json.Unmarshal(localContents, &report2); err != nil {
- return "", fmt.Errorf("failed to unmarshal local report (%v)", err)
+ return "", fmt.Errorf("failed to unmarshal local report for %s: %v", expiryDate, err)
}
var uploadContents []byte
@@ -215,7 +221,7 @@
uploadContents, err = json.MarshalIndent(upload, "", " ")
if err != nil {
- return "", fmt.Errorf("failed to marshal upload report (%v)", err)
+ return "", fmt.Errorf("failed to marshal upload report for %s: %v", expiryDate, err)
}
}
localFileName := filepath.Join(u.dir.LocalDir(), "local."+expiryDate+".json")
@@ -225,11 +231,11 @@
// if either file exists, someone has been here ahead of us
// (there is still a race, but this check shortens the open window)
if _, err := os.Stat(localFileName); err == nil {
- u.deleteFiles(files)
+ u.deleteFiles(countFiles)
return "", fmt.Errorf("local report %s already exists", localFileName)
}
if _, err := os.Stat(uploadFileName); err == nil {
- u.deleteFiles(files)
+ u.deleteFiles(countFiles)
return "", fmt.Errorf("report %s already exists", uploadFileName)
}
// write the uploadable file
@@ -249,8 +255,8 @@
if errUpload != nil {
return "", fmt.Errorf("failed to write upload file %s (%v)", uploadFileName, errUpload)
}
- u.logger.Printf("created %q, deleting %v", uploadFileName, files)
- u.deleteFiles(files)
+ u.logger.Printf("Created %s, deleting %d count files", filepath.Base(uploadFileName), len(countFiles))
+ u.deleteFiles(countFiles)
if uploadOK {
return uploadFileName, nil
}
diff --git a/internal/upload/run.go b/internal/upload/run.go
index de63324..2fb9fa6 100644
--- a/internal/upload/run.go
+++ b/internal/upload/run.go
@@ -94,7 +94,7 @@
default:
logWriter = io.MultiWriter(logWriters...)
}
- logger := log.New(logWriter, "", 0)
+ logger := log.New(logWriter, "", log.Ltime|log.Lmicroseconds|log.Lshortfile)
// Fetch the upload config, if it is not provided.
config, configVersion, err := configstore.Download("latest", rcfg.Env)
@@ -136,8 +136,10 @@
todo := u.findWork()
ready, err := u.reports(&todo)
if err != nil {
+ u.logger.Printf("Error building reports: %v", err)
return fmt.Errorf("reports failed: %v", err)
}
+ u.logger.Printf("Uploading %d reports", len(ready))
for _, f := range ready {
u.uploadReport(f)
}
diff --git a/internal/upload/upload.go b/internal/upload/upload.go
index 5f3f639..bec2230 100644
--- a/internal/upload/upload.go
+++ b/internal/upload/upload.go
@@ -44,9 +44,9 @@
today := thisInstant.Format("2006-01-02")
match := dateRE.FindStringSubmatch(fname)
if match == nil || len(match) < 2 {
- u.logger.Printf("report name seemed to have no date %q", filepath.Base(fname))
+ u.logger.Printf("Report name %q missing date", filepath.Base(fname))
} else if match[1] > today {
- u.logger.Printf("report %q is later than today %s", filepath.Base(fname), today)
+ u.logger.Printf("Report date for %q is later than today (%s)", filepath.Base(fname), today)
return // report is in the future, which shouldn't happen
}
buf, err := os.ReadFile(fname)
@@ -64,31 +64,31 @@
b := bytes.NewReader(buf)
fdate := strings.TrimSuffix(filepath.Base(fname), ".json")
fdate = fdate[len(fdate)-len("2006-01-02"):]
- server := u.uploadServerURL + "/" + fdate
+ endpoint := u.uploadServerURL + "/" + fdate
- resp, err := http.Post(server, "application/json", b)
+ resp, err := http.Post(endpoint, "application/json", b)
if err != nil {
- u.logger.Printf("error on Post: %v %q for %q", err, server, fname)
+ u.logger.Printf("Error upload %s to %s: %v", filepath.Base(fname), endpoint, err)
return false
}
// hope for a 200, remove file on a 4xx, otherwise it will be retried by another process
if resp.StatusCode != 200 {
- u.logger.Printf("resp error on upload %q: %v for %q %q [%+v]", server, resp.Status, fname, fdate, resp)
+ u.logger.Printf("Failed to upload %s to %s: %s", filepath.Base(fname), endpoint, resp.Status)
if resp.StatusCode >= 400 && resp.StatusCode < 500 {
err := os.Remove(fname)
if err == nil {
- u.logger.Printf("removed")
+ u.logger.Printf("Removed local/%s", filepath.Base(fname))
} else {
- u.logger.Printf("error removing: %v", err)
+ u.logger.Printf("Error removing local/%s: %v", filepath.Base(fname), err)
}
}
return false
}
- // put a copy in the uploaded directory
+ // Store a copy of the uploaded report in the uploaded directory.
newname := filepath.Join(u.dir.UploadDir(), fdate+".json")
if err := os.WriteFile(newname, buf, 0644); err == nil {
os.Remove(fname) // if it exists
}
- u.logger.Printf("uploaded %s to %q", fdate+".json", server)
+ u.logger.Printf("Uploaded %s to %q", fdate+".json", endpoint)
return true
}