Skip to content

Commit

Permalink
telemetry: acquire upload token before starting uploader child
Browse files Browse the repository at this point in the history
Instead of acquiring the upload token after the child is started,
acquire it before starting the child. Then, if the token isn't acquired
and crashmonitoring isn't requested, the child doesn't need to be
started at all. The GO_TELEMETRY_UPLOAD environment variable is set to
indicate to the child that uploading was requested. We need to set the
environment variable (or add some other way to communicate between the
parent and child such as a command line argument) because if
crashmonitoring was also requested the child would have no way of
knowing if the upload token was acquired.

The X_TELEMETRY_CHILD environment variable is renamed to
GO_TELEMETRY_CHILD to make it clear the variable comes from the go
toolchain.

Fixes golang/go#67700

Change-Id: Ifc0b4ecd8e77711bef1fcb423c96a574aec3ae09
Reviewed-on: https://go-review.googlesource.com/c/telemetry/+/589375
LUCI-TryBot-Result: Go LUCI <[email protected]>
Reviewed-by: Robert Findley <[email protected]>
  • Loading branch information
matloob committed May 31, 2024
1 parent acd60de commit 4691165
Showing 1 changed file with 53 additions and 57 deletions.
110 changes: 53 additions & 57 deletions start.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ func Start(config Config) *StartResult {
mode, _ := telemetry.Default.Mode()
if mode == "off" {
// Telemetry is turned off. Crash reporting doesn't work without telemetry
// at least set to "local", and the uploader isn't started in uploaderChild if
// mode is "off"
// at least set to "local". The upload process runs in both "on" and "local" modes.
// In local mode the upload process builds local reports but does not do the upload.
return result
}

Expand All @@ -109,35 +109,35 @@ func Start(config Config) *StartResult {
return result
}

// Crash monitoring and uploading both require a sidecar process.
var (
reportCrashes = config.ReportCrashes && crashmonitor.Supported()
upload = config.Upload && mode != "off"
)
if reportCrashes || upload {
switch v := os.Getenv(telemetryChildVar); v {
case "":
// The subprocess started by parent has X_TELEMETRY_CHILD=1.
parent(reportCrashes, result)
case "1":
// golang/go#67211: be sure to set telemetryChildVar before running the
// child, because the child itself invokes the go command to download the
// upload config. If the telemetryChildVar variable is still set to "1",
// that delegated go command may think that it is itself a telemetry
// child.
//
// On the other hand, if telemetryChildVar were simply unset, then the
// delegated go commands would fork themselves recursively. Short-circuit
// this recursion.
os.Setenv(telemetryChildVar, "2")
child(reportCrashes, upload, config.UploadStartTime, config.UploadURL)
os.Exit(0)
case "2":
// Do nothing: see note above.
default:
log.Fatalf("unexpected value for %q: %q", telemetryChildVar, v)
var reportCrashes = config.ReportCrashes && crashmonitor.Supported()

switch v := os.Getenv(telemetryChildVar); v {
case "":
// The subprocess started by parent has GO_TELEMETRY_CHILD=1.
childShouldUpload := config.Upload && acquireUploadToken()
if reportCrashes || childShouldUpload {
parent(reportCrashes, childShouldUpload, result)
}
case "1":
// golang/go#67211: be sure to set telemetryChildVar before running the
// child, because the child itself invokes the go command to download the
// upload config. If the telemetryChildVar variable is still set to "1",
// that delegated go command may think that it is itself a telemetry
// child.
//
// On the other hand, if telemetryChildVar were simply unset, then the
// delegated go commands would fork themselves recursively. Short-circuit
// this recursion.
os.Setenv(telemetryChildVar, "2")
upload := os.Getenv(telemetryUploadVar) == "1"
child(reportCrashes, upload, config.UploadStartTime, config.UploadURL)
os.Exit(0)
case "2":
// Do nothing: see note above.
default:
log.Fatalf("unexpected value for %q: %q", telemetryChildVar, v)
}

return result
}

Expand All @@ -163,9 +163,13 @@ var daemonize = func(cmd *exec.Cmd) {}
//
// If telemetryChildVar is set to "2", this is a child of the child, and no
// further forking should occur.
const telemetryChildVar = "X_TELEMETRY_CHILD"
const telemetryChildVar = "GO_TELEMETRY_CHILD"

// If telemetryUploadVar is set to "1" in the environment, the upload token has been
// acquired by the parent, and the child should attempt an upload.
const telemetryUploadVar = "GO_TELEMETRY_CHILD_UPLOAD"

func parent(reportCrashes bool, result *StartResult) {
func parent(reportCrashes, upload bool, result *StartResult) {
// This process is the application (parent).
// Fork+exec the telemetry child.
exe, err := os.Executable()
Expand All @@ -179,6 +183,9 @@ func parent(reportCrashes bool, result *StartResult) {
cmd := exec.Command(exe, "** telemetry **") // this unused arg is just for ps(1)
daemonize(cmd)
cmd.Env = append(os.Environ(), telemetryChildVar+"=1")
if upload {
cmd.Env = append(cmd.Env, telemetryUploadVar+"=1")
}
cmd.Dir = telemetry.Default.LocalDir()

// The child process must write to a log file, not
Expand Down Expand Up @@ -251,26 +258,6 @@ func child(reportCrashes, upload bool, uploadStartTime time.Time, uploadURL stri
}

func uploaderChild(asof time.Time, uploadURL string) {
if mode, _ := telemetry.Default.Mode(); mode == "off" {
// There's no work to be done if telemetry is turned off.
return
}
if telemetry.Default.LocalDir() == "" {
// The telemetry dir wasn't initialized properly, probably because
// os.UserConfigDir did not complete successfully. In that case
// there are no counters to upload, so we should just do nothing.
return
}
tokenfilepath := filepath.Join(telemetry.Default.LocalDir(), "upload.token")
ok, err := acquireUploadToken(tokenfilepath)
if err != nil {
log.Printf("error acquiring upload token: %v", err)
return
} else if !ok {
// It hasn't been a day since the last upload.Run attempt or there's
// a concurrently running uploader.
return
}
if err := upload.Run(upload.RunConfig{
UploadURL: uploadURL,
LogWriter: os.Stderr,
Expand All @@ -284,7 +271,14 @@ func uploaderChild(asof time.Time, uploadURL string) {
// To limit the frequency of uploads, only one token is issue per
// machine per time period.
// The boolean indicates whether the token was acquired.
func acquireUploadToken(tokenfile string) (bool, error) {
func acquireUploadToken() bool {
if telemetry.Default.LocalDir() == "" {
// The telemetry dir wasn't initialized properly, probably because
// os.UserConfigDir did not complete successfully. In that case
// there are no counters to upload, so we should just do nothing.
return false
}
tokenfile := filepath.Join(telemetry.Default.LocalDir(), "upload.token")
const period = 24 * time.Hour

// A process acquires a token by successfully creating a
Expand All @@ -296,7 +290,7 @@ func acquireUploadToken(tokenfile string) (bool, error) {
fi, err := os.Stat(tokenfile)
if err == nil {
if time.Since(fi.ModTime()) < period {
return false, nil
return false
}
// There's a possible race here where two processes check the
// token file and see that it's older than the period, then the
Expand All @@ -307,16 +301,18 @@ func acquireUploadToken(tokenfile string) (bool, error) {
// the token to do rate limiting, not for correctness.
_ = os.Remove(tokenfile)
} else if !os.IsNotExist(err) {
return false, fmt.Errorf("statting token file: %v", err)
log.Printf("error acquiring upload taken: statting token file: %v", err)
return false
}

f, err := os.OpenFile(tokenfile, os.O_CREATE|os.O_EXCL, 0666)
if err != nil {
if os.IsExist(err) {
return false, nil
return false
}
return false, fmt.Errorf("creating token file: %v", err)
log.Printf("error acquiring upload token: creating token file: %v", err)
return false
}
_ = f.Close()
return true, nil
return true
}

0 comments on commit 4691165

Please sign in to comment.