internal/upload: standardize logging and identify an upload bug

This CL includes a new TODO identifying a new bug that was preventing
uploads from my machine, as well as all the logging changes I made such
that I could identify the failure from debug logs.

A subsequent CL will fix the bug.

Change-Id: I3c1d3f82894888b45f2b94638948ee950a74cfff
Reviewed-on: https://go-review.googlesource.com/c/telemetry/+/584399
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/internal/upload/date.go b/internal/upload/date.go
index 4fc770f..1632b96 100644
--- a/internal/upload/date.go
+++ b/internal/upload/date.go
@@ -38,7 +38,7 @@
 //
 // TODO(rfindley): just return an error to make this explicit.
 func (u *Uploader) counterDateSpan(fname string) (begin, end time.Time) {
-	parsed, err := u.parse(fname)
+	parsed, err := u.parseCountFile(fname)
 	if err != nil {
 		u.logger.Printf("expiry Parse: %v for %s", err, fname)
 		return time.Time{}, farFuture
@@ -68,7 +68,7 @@
 	m  map[string]*counter.File
 }
 
-func (u *Uploader) parse(fname string) (*counter.File, error) {
+func (u *Uploader) parseCountFile(fname string) (*counter.File, error) {
 	u.cache.mu.Lock()
 	defer u.cache.mu.Unlock()
 	if u.cache.m == nil {
diff --git a/internal/upload/findwork.go b/internal/upload/findwork.go
index 22add2a..43586f8 100644
--- a/internal/upload/findwork.go
+++ b/internal/upload/findwork.go
@@ -41,9 +41,10 @@
 		if strings.HasSuffix(fi.Name(), ".v1.count") {
 			fname := filepath.Join(localdir, fi.Name())
 			if u.stillOpen(fname) {
-				u.logger.Printf("Skipping count file %s: still active", fname)
+				u.logger.Printf("Skipping count file %s: still active", fi.Name())
 				continue
 			}
+			u.logger.Printf("Collecting count file %s", fi.Name())
 			ans.countfiles = append(ans.countfiles, fname)
 		} else if strings.HasPrefix(fi.Name(), "local.") {
 			// skip
@@ -63,7 +64,7 @@
 					//
 					// TODO(rfindley): store the begin date in reports, so that we can
 					// verify this assumption.
-					u.logger.Printf("uploadable %s", fi.Name())
+					u.logger.Printf("Uploadable: %s", fi.Name())
 					ans.readyfiles = append(ans.readyfiles, filepath.Join(localdir, fi.Name()))
 				}
 			} else {
@@ -73,7 +74,7 @@
 				// TODO(rfindley): invert this logic following more testing. We
 				// should only upload if we know both the asof date and the report
 				// date, and they are acceptable.
-				u.logger.Printf("uploadable anyway %s", fi.Name())
+				u.logger.Printf("Uploadable (missing date): %s", fi.Name())
 				ans.readyfiles = append(ans.readyfiles, filepath.Join(localdir, fi.Name()))
 			}
 		}
@@ -89,6 +90,7 @@
 	ans.uploaded = make(map[string]bool)
 	for _, fi := range fis {
 		if strings.HasSuffix(fi.Name(), ".json") {
+			u.logger.Printf("Already uploaded: %s", fi.Name())
 			ans.uploaded[fi.Name()] = true
 		}
 	}
diff --git a/internal/upload/reports.go b/internal/upload/reports.go
index 4052bb0..262cc30 100644
--- a/internal/upload/reports.go
+++ b/internal/upload/reports.go
@@ -31,7 +31,7 @@
 	if lastWeek >= today { //should never happen
 		lastWeek = ""
 	}
-	u.logger.Printf("lastWeek %q, today %s", lastWeek, today)
+	u.logger.Printf("Last week: %s, today: %s", lastWeek, today)
 	countFiles := make(map[string][]string) // expiry date string->filenames
 	earliest := make(map[string]time.Time)  // earliest begin time for any counter
 	for _, f := range todo.countfiles {
@@ -47,7 +47,7 @@
 	}
 	for expiry, files := range countFiles {
 		if notNeeded(expiry, *todo) {
-			u.logger.Printf("files for %s not needed, deleting %v", expiry, files)
+			u.logger.Printf("Files for %s not needed, deleting %v", expiry, files)
 			// The report already exists.
 			// There's another check in createReport.
 			u.deleteFiles(files)
@@ -58,6 +58,7 @@
 			return nil, err
 		}
 		if fname != "" {
+			u.logger.Printf("Ready to upload: %s", filepath.Base(fname))
 			todo.readyfiles = append(todo.readyfiles, fname)
 		}
 	}
@@ -109,21 +110,21 @@
 
 // createReport for all the count files for the same date.
 // returns the absolute path name of the file containing the report
-func (u *Uploader) createReport(start time.Time, expiryDate string, files []string, lastWeek string) (string, error) {
+func (u *Uploader) createReport(start time.Time, expiryDate string, countFiles []string, lastWeek string) (string, error) {
 	uploadOK := true
 	mode, asof := u.dir.Mode()
 	if mode != "on" {
-		u.logger.Printf("no upload config or mode %q is not 'on'", mode)
+		u.logger.Printf("No upload config or mode %q is not 'on'", mode)
 		uploadOK = false // no config, nothing to upload
 	}
 	if u.tooOld(expiryDate, u.startTime) {
-		u.logger.Printf("expiryDate %s is too old", expiryDate)
+		u.logger.Printf("Expiry date %s is too old", expiryDate)
 		uploadOK = false
 	}
 	// If the mode is recorded with an asof date, don't upload if the report
 	// includes any data on or before the asof date.
 	if !asof.IsZero() && !asof.Before(start) {
-		u.logger.Printf("asof %s is not before start %s", asof, start)
+		u.logger.Printf("As-of date %s is not before start %s", asof, start)
 		uploadOK = false
 	}
 	// should we check that all the x.Meta are consistent for GOOS, GOARCH, etc?
@@ -134,14 +135,14 @@
 		LastWeek: lastWeek,
 	}
 	if report.X > u.config.SampleRate && u.config.SampleRate > 0 {
-		u.logger.Printf("X:%f > SampleRate:%f, not uploadable", report.X, u.config.SampleRate)
+		u.logger.Printf("X: %f > SampleRate:%f, not uploadable", report.X, u.config.SampleRate)
 		uploadOK = false
 	}
 	var succeeded bool
-	for _, f := range files {
-		x, err := u.parse(string(f))
+	for _, f := range countFiles {
+		x, err := u.parseCountFile(f)
 		if err != nil {
-			u.logger.Printf("unparseable (%v) %s", err, f)
+			u.logger.Printf("Unparseable count file %s: %v", filepath.Base(f), err)
 			continue
 		}
 		prog := findProgReport(x.Meta, report)
@@ -156,19 +157,24 @@
 			succeeded = true
 		}
 	}
+	// TODO(rfindley): There's a bug here: we return an error if a count file
+	// parses, but has no counter.
+	//
+	// Furthermore, this error causes us to bail out, and return no reports. We
+	// should only fail the report containing the count file.
 	if !succeeded {
-		return "", fmt.Errorf("all %d count files were unparseable", len(files))
+		return "", fmt.Errorf("all %d count files for %s were unparseable", len(countFiles), expiryDate)
 	}
 	// 1. generate the local report
 	localContents, err := json.MarshalIndent(report, "", " ")
 	if err != nil {
-		return "", fmt.Errorf("failed to marshal report (%v)", err)
+		return "", fmt.Errorf("failed to marshal report for %s: %v", expiryDate, err)
 	}
 	// check that the report can be read back
 	// TODO(pjw): remove for production?
 	var report2 telemetry.Report
 	if err := json.Unmarshal(localContents, &report2); err != nil {
-		return "", fmt.Errorf("failed to unmarshal local report (%v)", err)
+		return "", fmt.Errorf("failed to unmarshal local report for %s: %v", expiryDate, err)
 	}
 
 	var uploadContents []byte
@@ -215,7 +221,7 @@
 
 		uploadContents, err = json.MarshalIndent(upload, "", " ")
 		if err != nil {
-			return "", fmt.Errorf("failed to marshal upload report (%v)", err)
+			return "", fmt.Errorf("failed to marshal upload report for %s: %v", expiryDate, err)
 		}
 	}
 	localFileName := filepath.Join(u.dir.LocalDir(), "local."+expiryDate+".json")
@@ -225,11 +231,11 @@
 	// if either file exists, someone has been here ahead of us
 	// (there is still a race, but this check shortens the open window)
 	if _, err := os.Stat(localFileName); err == nil {
-		u.deleteFiles(files)
+		u.deleteFiles(countFiles)
 		return "", fmt.Errorf("local report %s already exists", localFileName)
 	}
 	if _, err := os.Stat(uploadFileName); err == nil {
-		u.deleteFiles(files)
+		u.deleteFiles(countFiles)
 		return "", fmt.Errorf("report %s already exists", uploadFileName)
 	}
 	// write the uploadable file
@@ -249,8 +255,8 @@
 	if errUpload != nil {
 		return "", fmt.Errorf("failed to write upload file %s (%v)", uploadFileName, errUpload)
 	}
-	u.logger.Printf("created %q, deleting %v", uploadFileName, files)
-	u.deleteFiles(files)
+	u.logger.Printf("Created %s, deleting %d count files", filepath.Base(uploadFileName), len(countFiles))
+	u.deleteFiles(countFiles)
 	if uploadOK {
 		return uploadFileName, nil
 	}
diff --git a/internal/upload/run.go b/internal/upload/run.go
index de63324..2fb9fa6 100644
--- a/internal/upload/run.go
+++ b/internal/upload/run.go
@@ -94,7 +94,7 @@
 	default:
 		logWriter = io.MultiWriter(logWriters...)
 	}
-	logger := log.New(logWriter, "", 0)
+	logger := log.New(logWriter, "", log.Ltime|log.Lmicroseconds|log.Lshortfile)
 
 	// Fetch the upload config, if it is not provided.
 	config, configVersion, err := configstore.Download("latest", rcfg.Env)
@@ -136,8 +136,10 @@
 	todo := u.findWork()
 	ready, err := u.reports(&todo)
 	if err != nil {
+		u.logger.Printf("Error building reports: %v", err)
 		return fmt.Errorf("reports failed: %v", err)
 	}
+	u.logger.Printf("Uploading %d reports", len(ready))
 	for _, f := range ready {
 		u.uploadReport(f)
 	}
diff --git a/internal/upload/upload.go b/internal/upload/upload.go
index 5f3f639..bec2230 100644
--- a/internal/upload/upload.go
+++ b/internal/upload/upload.go
@@ -44,9 +44,9 @@
 	today := thisInstant.Format("2006-01-02")
 	match := dateRE.FindStringSubmatch(fname)
 	if match == nil || len(match) < 2 {
-		u.logger.Printf("report name seemed to have no date %q", filepath.Base(fname))
+		u.logger.Printf("Report name %q missing date", filepath.Base(fname))
 	} else if match[1] > today {
-		u.logger.Printf("report %q is later than today %s", filepath.Base(fname), today)
+		u.logger.Printf("Report date for %q is later than today (%s)", filepath.Base(fname), today)
 		return // report is in the future, which shouldn't happen
 	}
 	buf, err := os.ReadFile(fname)
@@ -64,31 +64,31 @@
 	b := bytes.NewReader(buf)
 	fdate := strings.TrimSuffix(filepath.Base(fname), ".json")
 	fdate = fdate[len(fdate)-len("2006-01-02"):]
-	server := u.uploadServerURL + "/" + fdate
+	endpoint := u.uploadServerURL + "/" + fdate
 
-	resp, err := http.Post(server, "application/json", b)
+	resp, err := http.Post(endpoint, "application/json", b)
 	if err != nil {
-		u.logger.Printf("error on Post: %v %q for %q", err, server, fname)
+		u.logger.Printf("Error upload %s to %s: %v", filepath.Base(fname), endpoint, err)
 		return false
 	}
 	// hope for a 200, remove file on a 4xx, otherwise it will be retried by another process
 	if resp.StatusCode != 200 {
-		u.logger.Printf("resp error on upload %q: %v for %q %q [%+v]", server, resp.Status, fname, fdate, resp)
+		u.logger.Printf("Failed to upload %s to %s: %s", filepath.Base(fname), endpoint, resp.Status)
 		if resp.StatusCode >= 400 && resp.StatusCode < 500 {
 			err := os.Remove(fname)
 			if err == nil {
-				u.logger.Printf("removed")
+				u.logger.Printf("Removed local/%s", filepath.Base(fname))
 			} else {
-				u.logger.Printf("error removing: %v", err)
+				u.logger.Printf("Error removing local/%s: %v", filepath.Base(fname), err)
 			}
 		}
 		return false
 	}
-	// put a copy in the uploaded directory
+	// Store a copy of the uploaded report in the uploaded directory.
 	newname := filepath.Join(u.dir.UploadDir(), fdate+".json")
 	if err := os.WriteFile(newname, buf, 0644); err == nil {
 		os.Remove(fname) // if it exists
 	}
-	u.logger.Printf("uploaded %s to %q", fdate+".json", server)
+	u.logger.Printf("Uploaded %s to %q", fdate+".json", endpoint)
 	return true
 }