internal/analysis,worker: add source to diagnostics

Add source code lines to analysis diagnostics.

Change-Id: Ic69302fa8b62488aea8b820128df00fb46c7769e
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/484748
Run-TryBot: Zvonimir Pavlinovic <zpavlinovic@google.com>
Reviewed-by: Zvonimir Pavlinovic <zpavlinovic@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
diff --git a/internal/analysis/analysis.go b/internal/analysis/analysis.go
index b72aae7..576e935 100644
--- a/internal/analysis/analysis.go
+++ b/internal/analysis/analysis.go
@@ -184,6 +184,7 @@
 	Category string `bigquery:"category"`
 	Position string `bigquery:"position"`
 	Message  string `bigquery:"message"`
+	Source   string `bigquery:"source"`
 }
 
 // SchemaVersion changes whenever the analysis schema changes.
diff --git a/internal/worker/analysis.go b/internal/worker/analysis.go
index aedc0f3..369ea4d 100644
--- a/internal/worker/analysis.go
+++ b/internal/worker/analysis.go
@@ -5,6 +5,7 @@
 package worker
 
 import (
+	"bufio"
 	"context"
 	"crypto/sha256"
 	"encoding/hex"
@@ -16,6 +17,7 @@
 	"os"
 	"os/exec"
 	"path"
+	"strconv"
 	"strings"
 
 	"cloud.google.com/go/storage"
@@ -111,8 +113,13 @@
 		BinaryName:  req.Binary,
 		WorkVersion: wv,
 	}
-	err := doScan(ctx, req.Module, req.Version, req.Insecure, func() error {
-		jsonTree, err := s.scanInternal(ctx, req, localBinaryPath)
+	err := doScan(ctx, req.Module, req.Version, req.Insecure, func() (err error) {
+		// Create a module directory. scanInternal will write the module contents there,
+		// and both the analysis binary and addSource will read them.
+		mdir := moduleDir(req.Module, req.Version)
+		defer derrors.Cleanup(&err, func() error { return os.RemoveAll(mdir) })
+
+		jsonTree, err := s.scanInternal(ctx, req, localBinaryPath, mdir)
 		if err != nil {
 			return err
 		}
@@ -123,7 +130,7 @@
 		row.Version = info.Version
 		row.CommitTime = info.Time
 		row.Diagnostics = analysis.JSONTreeToDiagnostics(jsonTree)
-		return nil
+		return addSource(row.Diagnostics, 1)
 	})
 	if err != nil {
 		switch {
@@ -150,10 +157,8 @@
 	return row
 }
 
-func (s *analysisServer) scanInternal(ctx context.Context, req *analysis.ScanRequest, binaryPath string) (jt analysis.JSONTree, err error) {
-	mdir := moduleDir(req.Module, req.Version)
-	defer derrors.Cleanup(&err, func() error { return os.RemoveAll(mdir) })
-	if err := prepareModule(ctx, req.Module, req.Version, mdir, s.proxyClient, req.Insecure); err != nil {
+func (s *analysisServer) scanInternal(ctx context.Context, req *analysis.ScanRequest, binaryPath, moduleDir string) (jt analysis.JSONTree, err error) {
+	if err := prepareModule(ctx, req.Module, req.Version, moduleDir, s.proxyClient, req.Insecure); err != nil {
 		return nil, err
 	}
 	var sbox *sandbox.Sandbox
@@ -161,11 +166,7 @@
 		sbox = sandbox.New("/bundle")
 		sbox.Runsc = "/usr/local/bin/runsc"
 	}
-	tree, err := runAnalysisBinary(sbox, binaryPath, req.Args, mdir)
-	if err != nil {
-		return nil, err
-	}
-	return tree, nil
+	return runAnalysisBinary(sbox, binaryPath, req.Args, moduleDir)
 }
 
 func hashFile(filename string) (_ []byte, err error) {
@@ -209,6 +210,77 @@
 	return cmd.Output()
 }
 
+// addSource adds source code lines to the diagnostics.
+// Each diagnostic's position includes a full file path and line number.
+// addSource reads the file at the line, and includes nContext lines from above
+// and below.
+func addSource(ds []*analysis.Diagnostic, nContext int) error {
+	for _, d := range ds {
+		file, line, _, err := parsePosition(d.Position)
+		if err != nil {
+			return err
+		}
+		source, err := readSource(file, line, nContext)
+		if err != nil {
+			return fmt.Errorf("reading %s:%d: %w", file, line, err)
+		}
+		d.Source = source
+	}
+	return nil
+}
+
+// parsePosition parses a position from a diagnostic.
+// Positions are in the format file:line:col.
+func parsePosition(pos string) (file string, line, col int, err error) {
+	defer derrors.Wrap(&err, "parsePosition(%q)", pos)
+	i := strings.LastIndexByte(pos, ':')
+	if i < 0 {
+		return "", 0, 0, errors.New("missing colon")
+	}
+	col, err = strconv.Atoi(pos[i+1:])
+	if err != nil {
+		return "", 0, 0, err
+	}
+	pos = pos[:i]
+	i = strings.LastIndexByte(pos, ':')
+	if i < 0 {
+		return "", 0, 0, errors.New("missing second colon")
+	}
+	line, err = strconv.Atoi(pos[i+1:])
+	if err != nil {
+		return "", 0, 0, err
+	}
+	return pos[:i], line, col, nil
+}
+
+// readSource returns the given line (1-based) from the file, along with
+// nContext lines above and below it.
+func readSource(file string, line int, nContext int) (_ string, err error) {
+	defer derrors.Wrap(&err, "readSource(%q, %d, %d)", file, line, nContext)
+	f, err := os.Open(file)
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+	scan := bufio.NewScanner(f)
+	var lines []string
+	n := 0 // 1-based line number
+	for scan.Scan() {
+		n++
+		if n < line-nContext {
+			continue
+		}
+		if n > line+nContext {
+			break
+		}
+		lines = append(lines, scan.Text())
+	}
+	if scan.Err() != nil {
+		return "", scan.Err()
+	}
+	return strings.Join(lines, "\n"), nil
+}
+
 func (s *analysisServer) handleEnqueue(w http.ResponseWriter, r *http.Request) (err error) {
 	defer derrors.Wrap(&err, "analysisServer.handleEnqueue")
 	ctx := r.Context()
diff --git a/internal/worker/analysis_test.go b/internal/worker/analysis_test.go
index 4ddb60d..fde7d16 100644
--- a/internal/worker/analysis_test.go
+++ b/internal/worker/analysis_test.go
@@ -6,6 +6,8 @@
 
 import (
 	"context"
+	"fmt"
+	"os"
 	"path/filepath"
 	"strings"
 	"testing"
@@ -152,6 +154,7 @@
 				PackageID:    "a.com/m",
 				AnalyzerName: "findcall",
 				Message:      "call of G(...)",
+				Source:       "package p\nfunc F()  { G() }\nfunc G() {}",
 			},
 		},
 	}
@@ -175,3 +178,58 @@
 	}
 	diff(want, got)
 }
+
+func TestParsePosition(t *testing.T) {
+	for _, test := range []struct {
+		pos      string
+		wantFile string
+		wantLine int
+		wantCol  int
+		wantErr  bool
+	}{
+		{"", "", 0, 0, true},
+		{"x", "", 0, 0, true},
+		{"x/y:b:1", "", 0, 0, true},
+		{"x/y:17:2", "x/y", 17, 2, false},
+		{"x:y:z:973:3", "x:y:z", 973, 3, false},
+	} {
+		gotFile, gotLine, gotCol, err := parsePosition(test.pos)
+		gotErr := err != nil
+		if gotFile != test.wantFile || gotLine != test.wantLine || gotCol != test.wantCol || gotErr != test.wantErr {
+			t.Errorf("got (%q, %d, %d, %t), want (%q, %d, %d, %t)",
+				gotFile, gotLine, gotCol, gotErr,
+				test.wantFile, test.wantLine, test.wantCol, test.wantErr)
+		}
+	}
+}
+
+func TestReadSource(t *testing.T) {
+	// Create a file with five lines containing the numbers 1 through 5.
+	file := filepath.Join(t.TempDir(), "f")
+	if err := os.WriteFile(file, []byte("1\n2\n3\n4\n5\n"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	for _, test := range []struct {
+		line     int
+		nContext int
+		want     string
+	}{
+		// line number out of range -> empty string
+		{-1, 0, ""},
+		{6, 0, ""},
+		{1, 0, "1"},
+		{1, 1, "1\n2"},
+		{2, 1, "1\n2\n3"},
+		{4, 2, "2\n3\n4\n5"},
+	} {
+		t.Run(fmt.Sprintf("line:%d,nc:%d", test.line, test.nContext), func(t *testing.T) {
+			got, err := readSource(file, test.line, test.nContext)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if g, w := got, test.want; g != w {
+				t.Errorf("got\n%s\nwant\n%s", g, w)
+			}
+		})
+	}
+}