Skip to content

Commit

Permalink
telemetry: fix automated crash reporting using telemetry.Start
Browse files Browse the repository at this point in the history
Following CL 592017, the child process started by telemetry.Start no
longer opens the counter file, causing crash reports to be dropped on
the floor. Fix this, with a test.

Fixes golang/go#69681

Change-Id: I423533d5b1e2369f13880cecd9137cd7c9239240
Reviewed-on: https://go-review.googlesource.com/c/telemetry/+/616396
Reviewed-by: Alan Donovan <[email protected]>
LUCI-TryBot-Result: Go LUCI <[email protected]>
Auto-Submit: Robert Findley <[email protected]>
  • Loading branch information
findleyr authored and gopherbot committed Sep 27, 2024
1 parent 1b7b43a commit 1967543
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 17 deletions.
3 changes: 3 additions & 0 deletions start.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ func child(config Config) {
uploadStartTime := config.UploadStartTime
uploadURL := config.UploadURL

// The crashmonitor and/or upload process may themselves record counters.
counter.Open()

// Start crashmonitoring and uploading depending on what's requested
// and wait for the longer running child to complete before exiting:
// if we collected a crash before the upload finished, wait for the
Expand Down
63 changes: 46 additions & 17 deletions start_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"golang.org/x/telemetry/counter/countertest"
"golang.org/x/telemetry/internal/configtest"
ic "golang.org/x/telemetry/internal/counter"
"golang.org/x/telemetry/internal/crashmonitor"
"golang.org/x/telemetry/internal/regtest"
it "golang.org/x/telemetry/internal/telemetry"
"golang.org/x/telemetry/internal/testenv"
Expand Down Expand Up @@ -48,7 +49,6 @@ func TestMain(m *testing.M) {
// runProg runs the given program.
// See the switch statement below.
func runProg(prog string) int {

mustGetEnv := func(envvar string) string {
v := os.Getenv(envvar)
if v == "" {
Expand All @@ -65,56 +65,80 @@ func runProg(prog string) int {

// Set global state.
ic.CounterTime = func() time.Time { return asof } // must be done before Open
countertest.Open(mustGetEnv(telemetryDirEnv))

telemetryDir := mustGetEnv(telemetryDirEnv)

switch prog {
case "setmode":
countertest.Open(telemetryDir)
// Use the modified time above for the asof time.
if err := it.Default.SetModeAsOf("on", asof); err != nil {
log.Fatalf("setting mode: %v", err)
}
case "inc":
countertest.Open(telemetryDir)
// (CounterTime is already set above)
counter.Inc("teststart/counter")

case "start":
case "crash":
telemetry.Start(telemetry.Config{
// Do not call Open, to assert that Start does it (golang/go#69681).
TelemetryDir: telemetryDir,
ReportCrashes: true,
})
// Note: don't await the result of start: the child process won't complete
// until the crash occurs.
panic("crash!")

case "upload":
res := telemetry.Start(telemetry.Config{
// No need to set TelemetryDir since the Default dir is already set by countertest.Open.
TelemetryDir: telemetryDir,
Upload: true,
UploadURL: mustGetEnv(uploadURLEnv),
UploadStartTime: asof,
})
res.Wait()

default:
log.Fatalf("unknown program %q", prog)
}
return 0
}

func execProg(t *testing.T, telemetryDir, prog string, asof time.Time, env ...string) {
func execProg(t *testing.T, telemetryDir, prog string, asof time.Time, expectFailure bool, env ...string) {
// Run the runStart function below, via a fork+exec trick.
exe, err := os.Executable()
if err != nil {
t.Error(err)
return
}
cmd := exec.Command(exe, "** TestStart **") // this unused arg is just for ps(1)
cmd.Stderr = os.Stderr
if !expectFailure {
cmd.Stderr = os.Stderr
}
cmd.Env = os.Environ()
cmd.Env = append(cmd.Env, asofEnv+"="+asof.Format(it.DateOnly))
cmd.Env = append(cmd.Env, telemetryDirEnv+"="+telemetryDir)
cmd.Env = append(cmd.Env, runStartEnv+"="+prog) // see TestMain
cmd.Env = append(cmd.Env, env...)
out, err := cmd.Output()
if err != nil {
t.Errorf("program failed unexpectedly (%v)\n%s", err, out)
if expectFailure {
if err == nil {
t.Errorf("program succeeded unexpectedly:\n%s", out)
}
} else if err != nil {
t.Errorf("program failed unexpectedly (%v):\n%s", err, out)
}
}

func TestStart(t *testing.T) {
testenv.SkipIfUnsupportedPlatform(t)
testenv.MustHaveExec(t)

if !crashmonitor.Supported() {
t.Skip("crashmonitor not supported")
}

// This test sets up a telemetry environment, and then runs a test program
// that increments a counter, and uploads via telemetry.Start.

Expand All @@ -127,21 +151,26 @@ func TestStart(t *testing.T) {
if err != nil {
t.Errorf("error reading body: %v", err)
} else {
if body := string(body); !strings.Contains(body, "teststart/counter") {
body := string(body)
if !strings.Contains(body, "teststart/counter") {
t.Errorf("upload does not contain \"teststart/counter\":\n%s", body)
}
if !strings.Contains(body, "crash/crash") {
t.Errorf("upload does not contain \"crash/crash\":\n%s", body)
}
}
}))
uploadEnv := []string{uploadURLEnv + "=" + server.URL}

uc := regtest.CreateTestUploadConfig(t, []string{"teststart/counter"}, nil)
uc := regtest.CreateTestUploadConfig(t, []string{"teststart/counter"}, []string{"crash/crash"})
uploadEnv = append(uploadEnv, configtest.LocalProxyEnv(t, uc, "v1.2.3")...)

// Script programs.
now := time.Now()
execProg(t, telemetryDir, "setmode", now.Add(-30*24*time.Hour)) // back-date telemetry acceptance
execProg(t, telemetryDir, "inc", now.Add(-8*24*time.Hour)) // increment the counter
execProg(t, telemetryDir, "start", now, uploadEnv...) // run start
execProg(t, telemetryDir, "setmode", now.Add(-30*24*time.Hour), false) // back-date telemetry acceptance
execProg(t, telemetryDir, "inc", now.Add(-8*24*time.Hour), false) // increment the counter
execProg(t, telemetryDir, "crash", now.Add(-8*24*time.Hour), true) // run start
execProg(t, telemetryDir, "upload", now, false, uploadEnv...) // run start

if !uploaded {
t.Fatalf("no upload occurred on %v", os.Getpid())
Expand Down Expand Up @@ -171,12 +200,12 @@ func TestConcurrentStart(t *testing.T) {
uploadEnv = append(uploadEnv, configtest.LocalProxyEnv(t, uc, "v1.2.3")...)

now := time.Now()
execProg(t, telemetryDir, "setmode", now.Add(-365*24*time.Hour)) // back-date telemetry acceptance
execProg(t, telemetryDir, "inc", now.Add(-8*24*time.Hour)) // increment the counter
execProg(t, telemetryDir, "setmode", now.Add(-365*24*time.Hour), false) // back-date telemetry acceptance
execProg(t, telemetryDir, "inc", now.Add(-8*24*time.Hour), false) // increment the counter

// Populate three weeks of counters to upload.
for i := -28; i < -7; i++ { // Populate three weeks of counters to upload.
execProg(t, telemetryDir, "inc", now.Add(time.Duration(i)*24*time.Hour))
execProg(t, telemetryDir, "inc", now.Add(time.Duration(i)*24*time.Hour), false)
}

// Run start concurrently.
Expand All @@ -185,7 +214,7 @@ func TestConcurrentStart(t *testing.T) {
wg.Add(1)
go func() {
defer wg.Done()
execProg(t, telemetryDir, "start", now, uploadEnv...)
execProg(t, telemetryDir, "upload", now, false, uploadEnv...)
}()
}
wg.Wait()
Expand Down

0 comments on commit 1967543

Please sign in to comment.