terraform: add alerting for cloud scheduler job errors
Change-Id: I91919f9c34ef22ba11ee2573de99f835a95cd905
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/468939
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Zvonimir Pavlinovic <zpavlinovic@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
diff --git a/.gitignore b/.gitignore
index 6ca0649..97fbbe5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@
go-vulndb
config.json
docker-build
+vars.env
diff --git a/terraform/main.tf b/terraform/main.tf
index 4474b8a..a008bfe 100644
--- a/terraform/main.tf
+++ b/terraform/main.tf
@@ -146,6 +146,53 @@
}
}
+resource "google_logging_metric" "scheduler_errors" {
+ name = "cloud-scheduler-errors"
+ description = "Number of errors from Cloud Scheduler jobs"
+ filter = "resource.type=cloud_scheduler_job AND severity>=ERROR"
+ metric_descriptor {
+ metric_kind = "DELTA"
+ unit = "1"
+ value_type = "INT64"
+ }
+}
+
+resource "google_monitoring_notification_channel" "email" {
+ display_name = "Go Ecosystem Team Alerts"
+ type = "email"
+ labels = {
+ email_address = "go-ecosystem-team+alerts@google.com"
+ }
+}
+
+resource "google_monitoring_alert_policy" "scheduler_job_failing" {
+ display_name = "Cloud Scheduler Job Failing"
+
+ conditions {
+ display_name = "Instance Count"
+
+ condition_threshold {
+ filter = <<-EOT
+ metric.type="logging.googleapis.com/user/cloud-scheduler-errors"
+ resource.type="audited_resource"
+ EOT
+ comparison = "COMPARISON_GT"
+ threshold_value = 1
+ aggregations {
+ alignment_period = "600s"
+ cross_series_reducer = "REDUCE_SUM"
+ per_series_aligner = "ALIGN_DELTA"
+ }
+ duration = "0s"
+ trigger { count = 1 }
+ }
+ }
+
+ combiner = "OR"
+
+ notification_channels = [google_monitoring_notification_channel.email.name]
+
+}
# Deployment environments