internal/bigquery: silence already-exists error logs

We silently handle dataset already-exists error, but the log error
messages keep getting generated. This can be confusing during debugging.
This CL silences these messages by explicitly checking if the dataset
exists and then exiting early.

Also, this CL makes explicit that the biquery API calls the worker is
making always either create dataset/table or update them.

Change-Id: Ic534026c65c5d67793c13aeb642adaf40bbfe1db
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/524115
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Zvonimir Pavlinovic <zpavlinovic@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
diff --git a/internal/bigquery/bigquery.go b/internal/bigquery/bigquery.go
index 238b32e..9b43340 100644
--- a/internal/bigquery/bigquery.go
+++ b/internal/bigquery/bigquery.go
@@ -30,15 +30,23 @@
 	deleteDatasetOnClose bool
 }
 
-// NewClient creates a new client for connecting to BigQuery, referring to a single dataset.
-// The dataset must already exist.
-func NewClient(ctx context.Context, projectID, datasetID string) (_ *Client, err error) {
+// NewClientCreate creates a new client for connecting to BigQuery, referring
+// to a single dataset. It creates the dataset if it doesn't exist.
+func NewClientCreate(ctx context.Context, projectID, datasetID string) (_ *Client, err error) {
+	if err := CreateDataset(ctx, projectID, datasetID); err != nil {
+		return nil, err
+	}
+	return newClient(ctx, projectID, datasetID)
+}
+
+func newClient(ctx context.Context, projectID, datasetID string) (_ *Client, err error) {
 	defer derrors.Wrap(&err, "New(ctx, %q, %q)", projectID, datasetID)
 	client, err := bq.NewClient(ctx, projectID)
 	if err != nil {
 		return nil, err
 	}
 	dataset := client.DatasetInProject(projectID, datasetID)
+	// Check that the dataset exists and is accessible.
 	if _, err := dataset.Metadata(ctx); err != nil {
 		return nil, err
 	}
@@ -48,14 +56,6 @@
 	}, nil
 }
 
-// NewClientCreate is like NewClient, but it creates the dataset if it doesn't exist.
-func NewClientCreate(ctx context.Context, projectID, datasetID string) (_ *Client, err error) {
-	if err := CreateDataset(ctx, projectID, datasetID); err != nil {
-		return nil, err
-	}
-	return NewClient(ctx, projectID, datasetID)
-}
-
 func (c *Client) Close() (err error) {
 	if c.deleteDatasetOnClose {
 		err = c.dataset.DeleteWithContents(context.Background())
@@ -82,8 +82,13 @@
 		return err
 	}
 	dataset := client.DatasetInProject(projectID, datasetID)
+	// If the dataset exists, do not try to create it. This will
+	// avoid generating confusing error messages in logs.
+	if _, err := dataset.Metadata(ctx); err == nil || !isNotFoundError(err) {
+		return nil
+	}
 	err = dataset.Create(ctx, &bq.DatasetMetadata{Name: datasetID})
-	if err != nil && !isAlreadyExistsError(err) {
+	if err != nil && !isAlreadyExistsError(err) { // for sanity, check for already-exists error
 		return err
 	}
 	return nil
@@ -125,41 +130,29 @@
 	return fmt.Sprintf("%s.%s.%s", c.dataset.ProjectID, c.dataset.DatasetID, tableID)
 }
 
-// CreateTable creates a table with the given name if it doesn't exist.
-func (c *Client) CreateTable(ctx context.Context, tableID string) (err error) {
-	defer derrors.Wrap(&err, "CreateTable(%q)", tableID)
-	schema := TableSchema(tableID)
-	if schema == nil {
-		return fmt.Errorf("no schema registered for table %q", tableID)
-	}
-	err = c.Table(tableID).Create(ctx, &bq.TableMetadata{Schema: schema})
-	if err != nil && !isAlreadyExistsError(err) {
-		return err
-	}
-	return nil
-}
-
 // CreateOrUpdateTable creates a table if it does not exist, or updates it if it does.
 // It returns true if it created the table.
 func (c *Client) CreateOrUpdateTable(ctx context.Context, tableID string) (created bool, err error) {
 	defer derrors.Wrap(&err, "CreateOrUpdateTable(%q)", tableID)
+	schema := TableSchema(tableID)
+	if schema == nil {
+		return false, fmt.Errorf("no schema registered for table %q", tableID)
+	}
+
 	meta, err := c.Table(tableID).Metadata(ctx) // check if the table already exists
 	if err != nil {
 		if !isNotFoundError(err) {
 			return false, err
 		}
-		return true, c.CreateTable(ctx, tableID)
+		return true, c.Table(tableID).Create(ctx, &bq.TableMetadata{Schema: schema})
 	}
-	schema := TableSchema(tableID)
-	if schema == nil {
-		return false, fmt.Errorf("no schema registered for table %q", tableID)
-	}
+
 	_, err = c.Table(tableID).Update(ctx, bq.TableMetadataToUpdate{Schema: schema}, meta.ETag)
 	// There is a race condition if multiple threads of control call this function concurrently:
 	// The table may have changed since Metadata was called above, making the Etag invalid and
 	// resulting in a PreconditionFailed error. This error is harmless: it just means that someone
 	// else updated the table before us. Ignore it.
-	if isAlreadyExistsError(err) || hasCode(err, http.StatusPreconditionFailed) {
+	if isAlreadyExistsError(err) || hasCode(err, http.StatusPreconditionFailed) { // for sanity, check for already-exists error
 		return false, nil
 	}
 	return false, err
@@ -310,7 +303,7 @@
 	tables  = map[string]bq.Schema{}
 )
 
-// AddTable records the schema for a table, so CreateTable just needs the name.
+// AddTable records the schema for a table, so table creation just needs the name.
 func AddTable(tableID string, s bq.Schema) {
 	tableMu.Lock()
 	defer tableMu.Unlock()
diff --git a/internal/vulndb/vulndb_test.go b/internal/vulndb/vulndb_test.go
index d2c15c9..c76edcd 100644
--- a/internal/vulndb/vulndb_test.go
+++ b/internal/vulndb/vulndb_test.go
@@ -54,7 +54,7 @@
 	defer client.Close()
 
 	writeToBigQuery := func(es []*Entry) {
-		if err := client.CreateTable(ctx, TableName); err != nil {
+		if _, err := client.CreateOrUpdateTable(ctx, TableName); err != nil {
 			t.Fatal(err)
 		}
 		if err := bigquery.UploadMany(ctx, client, TableName, es, 0); err != nil {
diff --git a/internal/vulndbreqs/bq.go b/internal/vulndbreqs/bq.go
index 743d13d..664d5fa 100644
--- a/internal/vulndbreqs/bq.go
+++ b/internal/vulndbreqs/bq.go
@@ -58,13 +58,13 @@
 // writeToBigQuery writes request counts to BigQuery.
 func writeToBigQuery(ctx context.Context, client *bigquery.Client, rcs []*RequestCount, ircs []*IPRequestCount) (err error) {
 	defer derrors.Wrap(&err, "vulndbreqs.writeToBigQuery")
-	if err := client.CreateTable(ctx, RequestCountTableName); err != nil {
+	if _, err := client.CreateOrUpdateTable(ctx, RequestCountTableName); err != nil {
 		return err
 	}
 	if err := bigquery.UploadMany(ctx, client, RequestCountTableName, rcs, 0); err != nil {
 		return err
 	}
-	if err := client.CreateTable(ctx, IPRequestCountTableName); err != nil {
+	if _, err := client.CreateOrUpdateTable(ctx, IPRequestCountTableName); err != nil {
 		return err
 	}
 	return bigquery.UploadMany(ctx, client, IPRequestCountTableName, ircs, 0)