internal/postgres: upsert module data

When we insert module information, make sure we overwrite existing
rows. Previously, we specified that new data should be ignored if the
row already existed, which meant that we were adding rows but never
changing them. That didn't matter at one point, when we deleted a
module before re-inserting it, but we no longer delete, so we must
upsert.

We also fix a number of places where our test modules had duplicate
directories. The upserts failed for these.

Change-Id: I97465b11e4ea6cbb7835e883f36f098445207eba
Reviewed-on: https://team-review.git.corp.google.com/c/golang/discovery/+/766365
Reviewed-by: Julie Qiu <julieqiu@google.com>
diff --git a/internal/fetch/directory.go b/internal/fetch/directory.go
index 84bef17..a14364d 100644
--- a/internal/fetch/directory.go
+++ b/internal/fetch/directory.go
@@ -19,7 +19,6 @@
 	pkgs []*internal.LegacyPackage,
 	readmes []*internal.Readme,
 	d *licenses.Detector) []*internal.DirectoryNew {
-
 	pkgLookup := map[string]*internal.LegacyPackage{}
 	for _, pkg := range pkgs {
 		pkgLookup[pkg.Path] = pkg
diff --git a/internal/postgres/details_test.go b/internal/postgres/details_test.go
index b4b4ce2..e2be719 100644
--- a/internal/postgres/details_test.go
+++ b/internal/postgres/details_test.go
@@ -396,6 +396,7 @@
 	ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 	defer cancel()
 
+	testModule.Licenses = nil
 	for _, p := range testModule.LegacyPackages {
 		testModule.Licenses = append(testModule.Licenses, &licenses.License{
 			Metadata: p.Licenses[0],
diff --git a/internal/postgres/directory_test.go b/internal/postgres/directory_test.go
index 544dde2..68895a0 100644
--- a/internal/postgres/directory_test.go
+++ b/internal/postgres/directory_test.go
@@ -294,18 +294,16 @@
 
 	// Add a module that has READMEs in a directory and a package.
 	m := sample.Module("a.com/m", "v1.2.3", "dir/p")
-	d := sample.DirectoryNewEmpty("a.com/m/dir")
+	d := findDirectory(m, "a.com/m/dir")
 	d.Readme = &internal.Readme{
 		Filepath: "DIR_README.md",
 		Contents: "dir readme",
 	}
-	m.Directories = append(m.Directories, d)
-	d = sample.DirectoryNewEmpty("a.com/m/dir/p")
+	d = findDirectory(m, "a.com/m/dir/p")
 	d.Readme = &internal.Readme{
 		Filepath: "PKG_README.md",
 		Contents: "pkg readme",
 	}
-	m.Directories = append(m.Directories, d)
 	if err := testDB.InsertModule(ctx, m); err != nil {
 		t.Fatal(err)
 	}
@@ -443,6 +441,15 @@
 	}
 }
 
+func findDirectory(m *internal.Module, path string) *internal.DirectoryNew {
+	for _, d := range m.Directories {
+		if d.Path == path {
+			return d
+		}
+	}
+	return nil
+}
+
 func TestGetDirectoryFieldSet(t *testing.T) {
 	ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 	defer cancel()
diff --git a/internal/postgres/insert_module.go b/internal/postgres/insert_module.go
index 4dc6f3a..5a7a862 100644
--- a/internal/postgres/insert_module.go
+++ b/internal/postgres/insert_module.go
@@ -237,8 +237,8 @@
 			"coverage",
 			"module_id",
 		}
-		return db.BulkInsert(ctx, "licenses", licenseCols, licenseValues,
-			database.OnConflictDoNothing)
+		return db.BulkUpsert(ctx, "licenses", licenseCols, licenseValues,
+			[]string{"module_path", "version", "file_path"})
 	}
 	return nil
 }
@@ -304,6 +304,7 @@
 		}
 	}
 	if len(pkgValues) > 0 {
+		uniqueCols := []string{"path", "module_path", "version"}
 		pkgCols := []string{
 			"path",
 			"synopsis",
@@ -319,7 +320,7 @@
 			"goarch",
 			"commit_time",
 		}
-		if err := db.BulkInsert(ctx, "packages", pkgCols, pkgValues, database.OnConflictDoNothing); err != nil {
+		if err := db.BulkUpsert(ctx, "packages", pkgCols, pkgValues, uniqueCols); err != nil {
 			return err
 		}
 	}
@@ -331,7 +332,7 @@
 			"from_version",
 			"to_path",
 		}
-		if err := db.BulkInsert(ctx, "imports", importCols, importValues, database.OnConflictDoNothing); err != nil {
+		if err := db.BulkUpsert(ctx, "imports", importCols, importValues, importCols); err != nil {
 			return err
 		}
 	}
@@ -363,7 +364,7 @@
 		return nil
 	}
 	cols := []string{"from_path", "from_module_path", "to_path"}
-	return tx.BulkInsert(ctx, "imports_unique", cols, values, database.OnConflictDoNothing)
+	return tx.BulkUpsert(ctx, "imports_unique", cols, values, cols)
 }
 
 func insertDirectories(ctx context.Context, db *database.DB, m *internal.Module, moduleID int) (err error) {
@@ -451,7 +452,9 @@
 		}
 		log.Debugf(ctx, "memory before inserting into paths: %dM", allocMeg())
 
-		if err := db.BulkInsertReturning(ctx, "paths", pathCols, pathValues, database.OnConflictDoNothing, []string{"id", "path"}, func(rows *sql.Rows) error {
+		uniqueCols := []string{"path", "module_id"}
+		returningCols := []string{"id", "path"}
+		if err := db.BulkUpsertReturning(ctx, "paths", pathCols, pathValues, uniqueCols, returningCols, func(rows *sql.Rows) error {
 			var (
 				pathID int
 				path   string
@@ -484,7 +487,7 @@
 			readmeValues = append(readmeValues, id, readme.Filepath, makeValidUnicode(readme.Contents))
 		}
 		readmeCols := []string{"path_id", "file_path", "contents"}
-		if err := db.BulkInsert(ctx, "readmes", readmeCols, readmeValues, database.OnConflictDoNothing); err != nil {
+		if err := db.BulkUpsert(ctx, "readmes", readmeCols, readmeValues, []string{"path_id"}); err != nil {
 			return err
 		}
 	}
@@ -500,14 +503,9 @@
 			id := pathToID[path]
 			docValues = append(docValues, id, doc.GOOS, doc.GOARCH, doc.Synopsis, makeValidUnicode(doc.HTML))
 		}
-		docCols := []string{
-			"path_id",
-			"goos",
-			"goarch",
-			"synopsis",
-			"html",
-		}
-		if err := db.BulkInsert(ctx, "documentation", docCols, docValues, database.OnConflictDoNothing); err != nil {
+		uniqueCols := []string{"path_id", "goos", "goarch"}
+		docCols := append(uniqueCols, "synopsis", "html")
+		if err := db.BulkUpsert(ctx, "documentation", docCols, docValues, uniqueCols); err != nil {
 			return err
 		}
 	}
@@ -525,7 +523,7 @@
 		}
 	}
 	importCols := []string{"path_id", "to_path"}
-	return db.BulkInsert(ctx, "package_imports", importCols, importValues, database.OnConflictDoNothing)
+	return db.BulkUpsert(ctx, "package_imports", importCols, importValues, importCols)
 }
 
 // lock obtains an exclusive, transaction-scoped advisory lock on modulePath.
diff --git a/internal/postgres/insert_module_test.go b/internal/postgres/insert_module_test.go
index 7e9f01f..8dde0ec 100644
--- a/internal/postgres/insert_module_test.go
+++ b/internal/postgres/insert_module_test.go
@@ -42,13 +42,8 @@
 			module: sample.DefaultModule(),
 		},
 		{
-			name: "valid test with internal package",
-			module: func() *internal.Module {
-				m := sample.Module(sample.ModulePath, sample.VersionString, "internal/ffoo")
-				m.Directories = append(m.Directories,
-					sample.DirectoryNewEmpty(sample.ModulePath+"/internal"))
-				return m
-			}(),
+			name:   "valid test with internal package",
+			module: sample.Module(sample.ModulePath, sample.VersionString, "internal/ffoo"),
 		},
 		{
 			name: "valid test with go.mod missing",
diff --git a/internal/testing/sample/sample.go b/internal/testing/sample/sample.go
index e3d5bc1..be482f4 100644
--- a/internal/testing/sample/sample.go
+++ b/internal/testing/sample/sample.go
@@ -146,12 +146,23 @@
 		LegacyModuleInfo: *mi,
 		LegacyPackages:   nil,
 		Licenses:         Licenses,
-		Directories: []*internal.DirectoryNew{
-			DirectoryNewForModuleRoot(mi, LicenseMetadata),
-		},
+	}
+	emptySuffix := false
+	for _, s := range suffixes {
+		if s == "" {
+			emptySuffix = true
+			break
+		}
+	}
+	if emptySuffix {
+		AddPackage(m, LegacyPackage(modulePath, ""))
+	} else {
+		m.Directories = []*internal.DirectoryNew{DirectoryNewForModuleRoot(mi, LicenseMetadata)}
 	}
 	for _, s := range suffixes {
-		AddPackage(m, LegacyPackage(modulePath, s))
+		if s != "" {
+			AddPackage(m, LegacyPackage(modulePath, s))
+		}
 	}
 	return m
 }