| // Copyright 2024 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package telemetry |
| |
| import ( |
| "fmt" |
| "log" |
| "os" |
| "os/exec" |
| "path/filepath" |
| "sync" |
| "time" |
| |
| "golang.org/x/sync/errgroup" |
| "golang.org/x/telemetry/counter" |
| "golang.org/x/telemetry/internal/crashmonitor" |
| "golang.org/x/telemetry/internal/telemetry" |
| "golang.org/x/telemetry/internal/upload" |
| ) |
| |
| // Config controls the behavior of [Start]. |
| type Config struct { |
| // ReportCrashes, if set, will enable crash reporting. |
| // ReportCrashes uses the [debug.SetCrashOutput] mechanism, which is a |
| // process-wide resource. |
| // Do not make other calls to that function within your application. |
| // ReportCrashes is a non-functional unless the program is built with go1.23+. |
| ReportCrashes bool |
| |
| // Upload causes this program to periodically upload approved counters |
| // from the local telemetry database to telemetry.go.dev. |
| // |
| // This option has no effect unless the user has given consent |
| // to enable data collection, for example by running |
| // cmd/gotelemetry or affirming the gopls dialog. |
| // |
| // (This feature is expected to be used only by gopls. |
| // Longer term, the go command may become the sole program |
| // responsible for uploading.) |
| Upload bool |
| |
| // TelemetryDir, if set, will specify an alternate telemetry |
| // directory to write data to. If not set, it uses the default |
| // directory. |
| // This field is intended to be used for isolating testing environments. |
| TelemetryDir string |
| |
| // UploadStartTime, if set, overrides the time used as the upload start time, |
| // which is the time used by the upload logic to determine whether counter |
| // file data should be uploaded. Only counter files that have expired before |
| // the start time are considered for upload. |
| // |
| // This field can be used to simulate a future upload that collects recently |
| // modified counters. |
| UploadStartTime time.Time |
| |
| // UploadURL, if set, overrides the URL used to receive uploaded reports. If |
| // unset, this URL defaults to https://telemetry.go.dev/upload. |
| UploadURL string |
| } |
| |
| // Start initializes telemetry using the specified configuration. |
| // |
| // Start opens the local telemetry database so that counter increment |
| // operations are durably recorded in the local file system. |
| // |
| // If [Config.Upload] is set, and the user has opted in to telemetry |
| // uploading, this process may attempt to upload approved counters |
| // to telemetry.go.dev. |
| // |
| // If [Config.ReportCrashes] is set, any fatal crash will be |
| // recorded by incrementing a counter named for the stack of the |
| // first running goroutine in the traceback. |
| // |
| // If either of these flags is set, Start re-executes the current |
| // executable as a child process, in a special mode in which it |
| // acts as a telemetry sidecar for the parent process (the application). |
| // In that mode, the call to Start will never return, so Start must |
| // be called immediately within main, even before such things as |
| // inspecting the command line. The application should avoid expensive |
| // steps or external side effects in init functions, as they will |
| // be executed twice (parent and child). |
| // |
| // Start returns a StartResult, which may be awaited via [StartResult.Wait] to |
| // wait for all work done by Start to complete. |
| func Start(config Config) *StartResult { |
| switch v := os.Getenv(telemetryChildVar); v { |
| case "": |
| // The subprocess started by parent has GO_TELEMETRY_CHILD=1. |
| return parent(config) |
| case "1": |
| child(config) // child will exit the process when it's done. |
| case "2": |
| // Do nothing: this was executed directly or indirectly by a child. |
| default: |
| log.Fatalf("unexpected value for %q: %q", telemetryChildVar, v) |
| } |
| |
| return &StartResult{} |
| } |
| |
| // MaybeChild executes the telemetry child logic if the calling program is |
| // the telemetry child process, and does nothing otherwise. It is meant to be |
| // called as the first thing in a program that uses telemetry.Start but cannot |
| // call telemetry.Start immediately when it starts. |
| func MaybeChild(config Config) { |
| if v := os.Getenv(telemetryChildVar); v == "1" { |
| child(config) // child will exit the process when it's done. |
| } |
| // other values of the telemetryChildVar environment variable |
| // will be handled by telemetry.Start. |
| } |
| |
| // A StartResult is a handle to the result of a call to [Start]. Call |
| // [StartResult.Wait] to wait for the completion of all work done on behalf of |
| // Start. |
| type StartResult struct { |
| wg sync.WaitGroup |
| } |
| |
| // Wait waits for the completion of all work initiated by [Start]. |
| func (res *StartResult) Wait() { |
| if res == nil { |
| return |
| } |
| res.wg.Wait() |
| } |
| |
| var daemonize = func(cmd *exec.Cmd) {} |
| |
| // If telemetryChildVar is set to "1" in the environment, this is the telemetry |
| // child. |
| // |
| // If telemetryChildVar is set to "2", this is a child of the child, and no |
| // further forking should occur. |
| const telemetryChildVar = "GO_TELEMETRY_CHILD" |
| |
| // If telemetryUploadVar is set to "1" in the environment, the upload token has been |
| // acquired by the parent, and the child should attempt an upload. |
| const telemetryUploadVar = "GO_TELEMETRY_CHILD_UPLOAD" |
| |
| func parent(config Config) *StartResult { |
| if config.TelemetryDir != "" { |
| telemetry.Default = telemetry.NewDir(config.TelemetryDir) |
| } |
| result := new(StartResult) |
| |
| mode, _ := telemetry.Default.Mode() |
| if mode == "off" { |
| // Telemetry is turned off. Crash reporting doesn't work without telemetry |
| // at least set to "local". The upload process runs in both "on" and "local" modes. |
| // In local mode the upload process builds local reports but does not do the upload. |
| return result |
| } |
| |
| counter.Open() |
| |
| if _, err := os.Stat(telemetry.Default.LocalDir()); err != nil { |
| // There was a problem statting LocalDir, which is needed for both |
| // crash monitoring and counter uploading. Most likely, there was an |
| // error creating telemetry.LocalDir in the counter.Open call above. |
| // Don't start the child. |
| return result |
| } |
| |
| childShouldUpload := config.Upload && acquireUploadToken() |
| reportCrashes := config.ReportCrashes && crashmonitor.Supported() |
| |
| if reportCrashes || childShouldUpload { |
| startChild(reportCrashes, childShouldUpload, result) |
| } |
| |
| return result |
| } |
| |
| func startChild(reportCrashes, upload bool, result *StartResult) { |
| // This process is the application (parent). |
| // Fork+exec the telemetry child. |
| exe, err := os.Executable() |
| if err != nil { |
| // There was an error getting os.Executable. It's possible |
| // for this to happen on AIX if os.Args[0] is not an absolute |
| // path and we can't find os.Args[0] in PATH. |
| log.Printf("failed to start telemetry sidecar: os.Executable: %v", err) |
| return |
| } |
| cmd := exec.Command(exe, "** telemetry **") // this unused arg is just for ps(1) |
| daemonize(cmd) |
| cmd.Env = append(os.Environ(), telemetryChildVar+"=1") |
| if upload { |
| cmd.Env = append(cmd.Env, telemetryUploadVar+"=1") |
| } |
| cmd.Dir = telemetry.Default.LocalDir() |
| |
| // The child process must write to a log file, not |
| // the stderr file it inherited from the parent, as |
| // the child may outlive the parent but should not prolong |
| // the life of any pipes created (by the grandparent) |
| // to gather the output of the parent. |
| // |
| // By default, we discard the child process's stderr, |
| // but in line with the uploader, log to a file in debug |
| // only if that directory was created by the user. |
| fd, err := os.Stat(telemetry.Default.DebugDir()) |
| if err != nil { |
| if !os.IsNotExist(err) { |
| log.Printf("failed to stat debug directory: %v", err) |
| return |
| } |
| } else if fd.IsDir() { |
| // local/debug exists and is a directory. Set stderr to a log file path |
| // in local/debug. |
| childLogPath := filepath.Join(telemetry.Default.DebugDir(), "sidecar.log") |
| childLog, err := os.OpenFile(childLogPath, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0600) |
| if err != nil { |
| log.Printf("opening sidecar log file for child: %v", err) |
| return |
| } |
| defer childLog.Close() |
| cmd.Stderr = childLog |
| } |
| |
| var crashOutputFile *os.File |
| if reportCrashes { |
| pipe, err := cmd.StdinPipe() |
| if err != nil { |
| log.Printf("StdinPipe: %v", err) |
| return |
| } |
| |
| crashOutputFile = pipe.(*os.File) // (this conversion is safe) |
| } |
| |
| if err := cmd.Start(); err != nil { |
| // The child couldn't be started. Log the failure. |
| log.Printf("can't start telemetry child process: %v", err) |
| return |
| } |
| if reportCrashes { |
| crashmonitor.Parent(crashOutputFile) |
| } |
| result.wg.Add(1) |
| go func() { |
| cmd.Wait() // Release resources if cmd happens not to outlive this process. |
| result.wg.Done() |
| }() |
| } |
| |
| func child(config Config) { |
| log.SetPrefix(fmt.Sprintf("telemetry-sidecar (pid %v): ", os.Getpid())) |
| |
| if config.TelemetryDir != "" { |
| telemetry.Default = telemetry.NewDir(config.TelemetryDir) |
| } |
| |
| // golang/go#67211: be sure to set telemetryChildVar before running the |
| // child, because the child itself invokes the go command to download the |
| // upload config. If the telemetryChildVar variable is still set to "1", |
| // that delegated go command may think that it is itself a telemetry |
| // child. |
| // |
| // On the other hand, if telemetryChildVar were simply unset, then the |
| // delegated go commands would fork themselves recursively. Short-circuit |
| // this recursion. |
| os.Setenv(telemetryChildVar, "2") |
| upload := os.Getenv(telemetryUploadVar) == "1" |
| |
| reportCrashes := config.ReportCrashes && crashmonitor.Supported() |
| uploadStartTime := config.UploadStartTime |
| uploadURL := config.UploadURL |
| |
| // Start crashmonitoring and uploading depending on what's requested |
| // and wait for the longer running child to complete before exiting: |
| // if we collected a crash before the upload finished, wait for the |
| // upload to finish before exiting |
| var g errgroup.Group |
| |
| if reportCrashes { |
| g.Go(func() error { |
| crashmonitor.Child() |
| return nil |
| }) |
| } |
| if upload { |
| g.Go(func() error { |
| uploaderChild(uploadStartTime, uploadURL) |
| return nil |
| }) |
| } |
| g.Wait() |
| |
| os.Exit(0) |
| } |
| |
| func uploaderChild(asof time.Time, uploadURL string) { |
| if err := upload.Run(upload.RunConfig{ |
| UploadURL: uploadURL, |
| LogWriter: os.Stderr, |
| StartTime: asof, |
| }); err != nil { |
| log.Printf("upload failed: %v", err) |
| } |
| } |
| |
| // acquireUploadToken acquires a token permitting the caller to upload. |
| // To limit the frequency of uploads, only one token is issue per |
| // machine per time period. |
| // The boolean indicates whether the token was acquired. |
| func acquireUploadToken() bool { |
| if telemetry.Default.LocalDir() == "" { |
| // The telemetry dir wasn't initialized properly, probably because |
| // os.UserConfigDir did not complete successfully. In that case |
| // there are no counters to upload, so we should just do nothing. |
| return false |
| } |
| tokenfile := filepath.Join(telemetry.Default.LocalDir(), "upload.token") |
| const period = 24 * time.Hour |
| |
| // A process acquires a token by successfully creating a |
| // well-known file. If the file already exists and has an |
| // mtime age less then than the period, the process does |
| // not acquire the token. If the file is older than the |
| // period, the process is allowed to remove the file and |
| // try to re-create it. |
| fi, err := os.Stat(tokenfile) |
| if err == nil { |
| if time.Since(fi.ModTime()) < period { |
| return false |
| } |
| // There's a possible race here where two processes check the |
| // token file and see that it's older than the period, then the |
| // first one removes it and creates another, and then a second one |
| // removes the newly created file and creates yet another |
| // file. Then both processes would act as though they had the token. |
| // This is very rare, but it's also okay because we're only grabbing |
| // the token to do rate limiting, not for correctness. |
| _ = os.Remove(tokenfile) |
| } else if !os.IsNotExist(err) { |
| log.Printf("error acquiring upload taken: statting token file: %v", err) |
| return false |
| } |
| |
| f, err := os.OpenFile(tokenfile, os.O_CREATE|os.O_EXCL, 0666) |
| if err != nil { |
| if os.IsExist(err) { |
| return false |
| } |
| log.Printf("error acquiring upload token: creating token file: %v", err) |
| return false |
| } |
| _ = f.Close() |
| return true |
| } |