terraform: add alert for build failures

Add a log-based metric that looks for failures in Cloud Build logs,
and an alert that fires if that metric is positive.

This is not the official way to be notified about Cloud Build
events. You're supposed to listen to the pubsub channel it writes to,
but that involves setting up a server or Cloud Function that then
sends email.

Instead, the metric looks for the line "ERROR" in the logs. I observed
that this line occurs when a build fails. But it might not be totally
reliable; we should keep an eye on it.

Change-Id: I5630fa7d6f6444fc835f1dfe4b94d72a905af6df
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/473168
Reviewed-by: Zvonimir Pavlinovic <zpavlinovic@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
diff --git a/terraform/main.tf b/terraform/main.tf
index b4ca55a..55caeda 100644
--- a/terraform/main.tf
+++ b/terraform/main.tf
@@ -157,6 +157,19 @@
   }
 }
 
+resource "google_logging_metric" "build_errors" {
+  name = "cloud-build-errors"
+  description = "Errors from Cloud Build"
+  filter = "resource.type=build AND textPayload=ERROR"
+  metric_descriptor {
+    metric_kind = "DELTA"
+    unit        = "1"
+    value_type  = "INT64"
+  }
+}
+
+
+
 resource "google_monitoring_notification_channel" "email" {
   display_name = "Go Ecosystem Team Alerts"
   type         = "email"
@@ -194,6 +207,36 @@
 
 }
 
+resource "google_monitoring_alert_policy" "build_job_failing" {
+  display_name = "Cloud Build Job Failing"
+
+  conditions {
+    display_name = "Instance Count"
+
+    condition_threshold {
+      filter          = <<-EOT
+        metric.type="logging.googleapis.com/user/cloud-build-errors"
+	resource.type="audited_resource"
+      EOT
+      comparison      = "COMPARISON_GT"
+      threshold_value = 1
+      aggregations {
+        alignment_period     = "600s"
+        cross_series_reducer = "REDUCE_SUM"
+        per_series_aligner   = "ALIGN_DELTA"
+      }
+      duration = "0s"
+      trigger { count = 1 }
+    }
+  }
+
+  combiner = "OR"
+
+  notification_channels = [google_monitoring_notification_channel.email.name]
+
+}
+
+
 # Cloud Build trigger to deploy the prod worker on every push to master.
 resource "google_cloudbuild_trigger" "deploy_prod_worker" {
   name = "Deploy-Prod-Ecosystem-Worker"