blob: 92890ce46fcdb0f3986eab79e0eaa1e3d2d6ca44 [file] [log] [blame]
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package dcensus provides functionality for debug instrumentation.
package dcensus
import (
"context"
"fmt"
"net/http"
"strings"
"time"
"contrib.go.opencensus.io/exporter/prometheus"
"contrib.go.opencensus.io/exporter/stackdriver"
"go.opencensus.io/plugin/ochttp"
"go.opencensus.io/stats"
"go.opencensus.io/stats/view"
"go.opencensus.io/tag"
"go.opencensus.io/trace"
"go.opencensus.io/zpages"
"golang.org/x/pkgsite/internal/config"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/log"
mrpb "google.golang.org/genproto/googleapis/api/monitoredres"
)
// KeyStatus is a tag key named "status".
var KeyStatus = tag.MustNewKey("status")
// RouteTagger is a func that can be used to derive a dynamic route tag for an
// incoming request.
type RouteTagger func(route string, r *http.Request) string
// Router is an http multiplexer that instruments per-handler debugging
// information and census instrumentation.
type Router struct {
http.Handler
mux *http.ServeMux
tagger RouteTagger
}
// NewRouter creates a new Router, using tagger to tag incoming requests in
// monitoring. If tagger is nil, a default route tagger is used.
func NewRouter(tagger RouteTagger) *Router {
if tagger == nil {
tagger = func(route string, r *http.Request) string {
return strings.Trim(route, "/")
}
}
mux := http.NewServeMux()
return &Router{
mux: mux,
Handler: &ochttp.Handler{Handler: mux},
tagger: tagger,
}
}
// Handle registers handler with the given route. It has the same routing
// semantics as http.ServeMux.
func (r *Router) Handle(route string, handler http.Handler) {
r.mux.HandleFunc(route, func(w http.ResponseWriter, req *http.Request) {
tag := r.tagger(route, req)
ochttp.WithRouteTag(handler, tag).ServeHTTP(w, req)
})
}
// HandleFunc is a wrapper around Handle for http.HandlerFuncs.
func (r *Router) HandleFunc(route string, handler http.HandlerFunc) {
r.Handle(route, handler)
}
const debugPage = `
<html>
<p><a href="/tracez">/tracez</a> - trace spans</p>
<p><a href="/statsz">/statz</a> - prometheus metrics page</p>
`
// Init configures tracing and aggregation according to the given Views. If
// running on GCP, Init also configures exporting to StackDriver.
func Init(cfg *config.Config, views ...*view.View) error {
// The default trace sampler samples with probability 1e-4. That's too
// infrequent for our traffic levels. In the future we may want to decrease
// this sampling rate.
trace.ApplyConfig(trace.Config{DefaultSampler: trace.ProbabilitySampler(0.01)})
if err := view.Register(views...); err != nil {
return fmt.Errorf("dcensus.Init(views): view.Register: %v", err)
}
exportToStackdriver(context.Background(), cfg)
return nil
}
// NewServer creates a new http.Handler for serving debug information.
func NewServer() (http.Handler, error) {
pe, err := prometheus.NewExporter(prometheus.Options{})
if err != nil {
return nil, fmt.Errorf("dcensus.NewServer: prometheus.NewExporter: %v", err)
}
mux := http.NewServeMux()
zpages.Handle(mux, "/")
mux.Handle("/statsz", pe)
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
fmt.Fprint(w, debugPage)
})
return mux, nil
}
// monitoredResource wraps a *mrpb.MonitoredResource to implement the
// monitoredresource.MonitoredResource interface.
type monitoredResource mrpb.MonitoredResource
func (r *monitoredResource) MonitoredResource() (resType string, labels map[string]string) {
return r.Type, r.Labels
}
// ExportToStackdriver checks to see if the process is running in a GCP
// environment, and if so configures exporting to stackdriver.
func exportToStackdriver(ctx context.Context, cfg *config.Config) {
if cfg.ProjectID == "" {
log.Infof(ctx, "Not exporting to StackDriver: GOOGLE_CLOUD_PROJECT is unset.")
return
}
// Report statistics every minutes, due to stackdriver limitations described at
// https://cloud.google.com/monitoring/custom-metrics/creating-metrics#writing-ts
view.SetReportingPeriod(time.Minute)
viewExporter, err := NewViewExporter(cfg)
if err != nil {
log.Fatalf(ctx, "error creating view exporter: %v", err)
}
view.RegisterExporter(viewExporter)
dte := &debugTraceExporter{}
// We want traces to be associated with the *app*, not the instance.
// TraceSpansBufferMaxBytes is increased from the default of 8MiB, though we
// can't increase *too* much because this is still running in GAE, which is
// relatively memory-constrained.
traceExporter, err := stackdriver.NewExporter(stackdriver.Options{
ProjectID: cfg.ProjectID,
MonitoredResource: (*monitoredResource)(cfg.MonitoredResource),
TraceSpansBufferMaxBytes: 32 * 1024 * 1024, // 32 MiB
DefaultMonitoringLabels: stackdriverLabels(cfg),
OnError: dte.onError,
})
if err != nil {
log.Fatalf(ctx, "error creating trace exporter: %v", err)
}
dte.exp = traceExporter
trace.RegisterExporter(dte)
}
// NewViewExporter creates a StackDriver exporter for stats.
func NewViewExporter(cfg *config.Config) (_ *stackdriver.Exporter, err error) {
defer derrors.Wrap(&err, "NewViewExporter()")
// Views must be associated with the instance, else we run into overlapping
// timeseries problems. Note that generic_task is used because the
// gae_instance resource type is not supported for metrics:
// https://cloud.google.com/monitoring/custom-metrics/creating-metrics#which-resource
mr := &monitoredResource{
Type: "generic_task",
Labels: map[string]string{
"project_id": cfg.ProjectID,
"location": cfg.LocationID,
"job": cfg.ServiceID,
"namespace": "go-discovery",
"task_id": cfg.InstanceID,
},
}
if cfg.OnGKE() {
mr = (*monitoredResource)(cfg.MonitoredResource)
}
log.Debugf(context.Background(), "monitored resource for monitoring: Type %q, Labels %v",
mr.Type, mr.Labels)
return stackdriver.NewExporter(stackdriver.Options{
ProjectID: cfg.ProjectID,
MonitoredResource: mr,
DefaultMonitoringLabels: stackdriverLabels(cfg),
OnError: func(err error) {
log.Warningf(context.Background(), "Stackdriver view exporter: %v", err)
},
})
}
func stackdriverLabels(cfg *config.Config) *stackdriver.Labels {
labels := &stackdriver.Labels{}
labels.Set("version", cfg.AppVersionLabel(), "Version label of the running binary")
labels.Set("env", cfg.DeploymentEnvironment(), "deployment environment")
labels.Set("app", cfg.Application(), "application name")
return labels
}
// MDur returns the latency value used for recording a measurement with
// opencensus.
func MDur(m *stats.Float64Measure, d time.Duration) stats.Measurement {
var v float64
switch m.Unit() {
case stats.UnitMilliseconds:
v = float64(d.Milliseconds())
case stats.UnitSeconds:
v = d.Seconds()
default:
panic(fmt.Sprintf("MDur: unsupported unit: %v", m))
}
return m.M(v)
}
// Customizations of ochttp views. Views are updated as follows:
// - ClientHost and ServerRoute are added to resp. client and server metrics.
// Since these are bounded cardinality in our metrics, they are useful to
// add additional context.
// - Method tags are removed. We don't have any routes that accept more than
// one HTTP method.
var (
ServerRequestCount = &view.View{
Name: "go-discovery/http/server/request_count",
Description: "Count of HTTP requests started by Method",
TagKeys: []tag.Key{ochttp.Method},
Measure: ochttp.ServerRequestCount,
Aggregation: view.Count(),
}
ServerResponseCount = &view.View{
Name: "go-discovery/http/server/response_count",
Description: "Server response count by status code and route",
TagKeys: []tag.Key{ochttp.StatusCode, ochttp.KeyServerRoute},
Measure: ochttp.ServerLatency,
Aggregation: view.Count(),
}
ServerLatency = &view.View{
Name: "go-discovery/http/server/response_latency",
Description: "Server response distribution by status code and route",
TagKeys: []tag.Key{ochttp.KeyServerRoute},
Measure: ochttp.ServerLatency,
Aggregation: ochttp.DefaultLatencyDistribution,
}
ServerResponseBytes = &view.View{
Name: "go-discovery/http/server/response_bytes",
Description: "Size distribution of HTTP response body",
TagKeys: []tag.Key{ochttp.KeyServerRoute},
Measure: ochttp.ServerResponseBytes,
Aggregation: ochttp.DefaultSizeDistribution,
}
ServerViews = []*view.View{
ServerRequestCount,
ServerResponseCount,
ServerLatency,
ServerResponseBytes,
}
)
// RecordWithTag is a convenience function for recording a single measurement with a single tag.
func RecordWithTag(ctx context.Context, key tag.Key, val string, m stats.Measurement) {
stats.RecordWithTags(ctx, []tag.Mutator{tag.Upsert(key, val)}, m)
}