From 929c451d8cc5e64927a98992e49df53d954817da Mon Sep 17 00:00:00 2001 From: Paul Gottschling Date: Thu, 7 Nov 2024 14:08:59 -0500 Subject: [PATCH] Add a workflow to check docs URLs Like the workflow to check for necessary redirects to match renamed or deleted docs pages, this workflow takes the path to a `gravitational/teleport` clone. It finds mentions of Teleport docs site paths in each file changed by a PR and ensures that each path has a corresponding docs page or redirect source. --- bot/go.mod | 7 +- bot/go.sum | 12 +- bot/internal/bot/doclinks.go | 158 ++++++++++++++++++++++++ bot/internal/bot/doclinks_test.go | 196 ++++++++++++++++++++++++++++++ bot/main.go | 2 + 5 files changed, 369 insertions(+), 6 deletions(-) create mode 100644 bot/internal/bot/doclinks.go create mode 100644 bot/internal/bot/doclinks_test.go diff --git a/bot/go.mod b/bot/go.mod index a49f74c3..969dcf3d 100644 --- a/bot/go.mod +++ b/bot/go.mod @@ -5,10 +5,13 @@ go 1.23 require ( github.com/google/go-github/v37 v37.0.0 github.com/gravitational/trace v1.3.1 + github.com/spf13/afero v1.11.0 github.com/stretchr/testify v1.8.4 - golang.org/x/oauth2 v0.13.0 + golang.org/x/oauth2 v0.15.0 ) +require golang.org/x/text v0.14.0 // indirect + require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/golang/protobuf v1.5.3 // indirect @@ -17,7 +20,7 @@ require ( github.com/pmezard/go-difflib v1.0.0 // indirect github.com/sirupsen/logrus v1.9.3 // indirect golang.org/x/crypto v0.17.0 // indirect - golang.org/x/net v0.17.0 // indirect + golang.org/x/net v0.19.0 // indirect golang.org/x/sys v0.15.0 // indirect golang.org/x/term v0.15.0 // indirect google.golang.org/appengine v1.6.8 // indirect diff --git a/bot/go.sum b/bot/go.sum index b3525eb9..4295a519 100644 --- a/bot/go.sum +++ b/bot/go.sum @@ -67,6 +67,8 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= +github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= @@ -99,12 +101,12 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= -golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= -golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= +golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= +golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= -golang.org/x/oauth2 v0.13.0 h1:jDDenyj+WgFtmV3zYVoi8aE2BwtXFLWOA67ZfNWftiY= -golang.org/x/oauth2 v0.13.0/go.mod h1:/JMhi4ZRXAf4HG9LiNmxvk+45+96RUlVThiH8FzNBn0= +golang.org/x/oauth2 v0.15.0 h1:s8pnnxNVzjWyrvYdFUQq5llS1PX2zhPXmccZv99h7uQ= +golang.org/x/oauth2 v0.15.0/go.mod h1:q48ptWNTY5XWf+JNten23lcvHpLJ0ZSxF5ttTHKVCAM= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -137,6 +139,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= diff --git a/bot/internal/bot/doclinks.go b/bot/internal/bot/doclinks.go new file mode 100644 index 00000000..c1594a22 --- /dev/null +++ b/bot/internal/bot/doclinks.go @@ -0,0 +1,158 @@ +package bot + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "path" + "regexp" + "strings" + "text/template" + + "github.com/gravitational/trace" + "github.com/spf13/afero" +) + +type staleDocURL struct { + Text string + Line int +} + +// CheckDocsURLs opens each file changed by a pull request and checks whether +// any URL paths for the Teleport documentation site within the file are +// incorrect. An incorrect URL path is one that does not correspond to either: +// - a docs page in the docs/pages directory within teleportClonePath +// - the source of a redirect named in the docs configuration file at +// docs/config.json within teleportClonePath +func (b *Bot) CheckDocsURLs(ctx context.Context, teleportClonePath string) error { + if teleportClonePath == "" { + return trace.Wrap(errors.New("unable to load Teleport documentation config with an empty path")) + } + + docsConfigPath := path.Join(teleportClonePath, "docs", "config.json") + f, err := os.Open(docsConfigPath) + if err != nil { + return trace.Wrap(fmt.Errorf("unable to open docs config file at %v: %v", docsConfigPath, err)) + } + defer f.Close() + + var c DocsConfig + if err := json.NewDecoder(f).Decode(&c); err != nil { + return trace.Wrap(fmt.Errorf("unable to load redirect configuration from %v: %v", docsConfigPath, err)) + } + + files, err := b.c.GitHub.ListFiles(ctx, b.c.Environment.Organization, b.c.Environment.Repository, b.c.Environment.Number) + if err != nil { + return trace.Wrap(fmt.Errorf("unable to fetch files for PR %v: %v", b.c.Environment.Number, err)) + } + + data := make(staleDocsURLData) + for _, f := range files { + l, err := os.Open(f.Name) + if err != nil { + return trace.Wrap(fmt.Errorf("unable to open PR file %v: %v", f.Name, err)) + } + defer l.Close() + + stale := staleDocsURLs(afero.NewOsFs(), c.Redirects, l, teleportClonePath) + if len(stale) == 0 { + continue + } + data[f.Name] = stale + } + + if len(data) != 0 { + return trace.Wrap(fmt.Errorf("found incorrect documentation URLs in the following files:\n%v", data)) + } + + return nil +} + +type staleDocsURLData map[string][]staleDocURL + +const staleURLDataTempl = `{{ range $key, $val := . }}{{ $key }}: +{{ range $val }} - line {{ .Line }}: {{ .Text }} +{{ end }} +{{ end }}` + +func (s staleDocsURLData) String() string { + var buf bytes.Buffer + template.Must(template.New("").Parse(staleURLDataTempl)).Execute(&buf, s) + return buf.String() +} + +var docURLRegexp = regexp.MustCompile(`goteleport.com(/([a-zA-Z0-9\._-]+/?)+)`) + +// docURLPathToFilePath converts urlpath, which is a valid redirect source, to +// two possible docs page file paths that would correspond to urlpath, given +// Docusaurus' logic for generating routes for category index pages. +// +// For example, "/admin-guides/database-access/" would map to either +// "docs/pages/admin-guides/database-access/database-access.mdx" or +// "docs/pages/admin-guides/database-access.mdx", depending on whether +// database-access.mdx is a category index page. +// +// In the return value, standard page path comes first, followed by the category +// index page path. +func docURLPathToFilePath(urlpath string) (string, string) { + segs := strings.Split(strings.Trim(urlpath, "/"), "/") + short := docsPrefix + strings.Join(segs, "/") + ".mdx" + if len(segs) == 0 { + return short, short + } + long := docsPrefix + strings.Join(append(segs, segs[len(segs)-1]), "/") + ".mdx" + return short, long +} + +// staleDocsURLs examines file to detect stale docs site URLs. A style docs site +// URL is a URL of a docs page that neither corresponds to a file in fs nor a +// redirect source in conf. +// +// staleDocsURLs assumes that docs paths are in fs and relative to +// teleportClonePath. +func staleDocsURLs(fs afero.Fs, conf RedirectConfig, file io.Reader, teleportClonePath string) []staleDocURL { + sources := make(map[string]struct{}) + for _, s := range conf { + sources[s.Source] = struct{}{} + } + + res := []staleDocURL{} + sc := bufio.NewScanner(file) + sc.Split(bufio.ScanLines) + var line int + for sc.Scan() { + line++ + m := docURLRegexp.FindAllStringSubmatch(sc.Text(), -1) + for _, a := range m { + urlpath := a[1] + // Redirect sources include a trailing "/". + if !strings.HasSuffix(urlpath, "/") { + urlpath += "/" + } + + // While goteleport.com/docs URLs include the /docs/ + // path segment, redirect sources do not. + urlpath = strings.TrimPrefix(urlpath, "/docs") + + _, hasRedirect := sources[urlpath] + short, long := docURLPathToFilePath(urlpath) + _, err1 := fs.Stat(path.Join(teleportClonePath, short)) + _, err2 := fs.Stat(path.Join(teleportClonePath, long)) + if hasRedirect || err1 == nil || err2 == nil { + continue + } + + res = append(res, staleDocURL{ + Text: a[0], + Line: line, + }) + } + } + + return res +} diff --git a/bot/internal/bot/doclinks_test.go b/bot/internal/bot/doclinks_test.go new file mode 100644 index 00000000..9f7866cb --- /dev/null +++ b/bot/internal/bot/doclinks_test.go @@ -0,0 +1,196 @@ +package bot + +import ( + "path" + "strings" + "testing" + + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" +) + +func TestStaleDocsURLs(t *testing.T) { + cases := []struct { + description string + sourceFile string + docsFilenames []string + redirects RedirectConfig + expected []staleDocURL + }{ + { + description: "no errors, no redirects", + sourceFile: `https://www.goteleport.com/docs/enroll-databases`, + docsFilenames: []string{ + "docs/pages/enroll-databases.mdx", + }, + redirects: RedirectConfig{}, + expected: []staleDocURL{}, + }, + { + description: "one error, no redirects", + sourceFile: `https://www.goteleport.com/docs/enroll-databases`, + docsFilenames: []string{ + "docs/pages/enroll-dbs.mdx", + }, + redirects: RedirectConfig{}, + expected: []staleDocURL{ + { + Text: "goteleport.com/docs/enroll-databases", + Line: 1, + }, + }, + }, + { + description: "no errors, one redirect", + sourceFile: `https://www.goteleport.com/docs/enroll-databases`, + docsFilenames: []string{ + "docs/pages/enroll-dbs.mdx", + }, + redirects: RedirectConfig{ + { + Source: "/enroll-databases/", + Destination: "/enroll-dbs/", + Permanent: true, + }, + }, + expected: []staleDocURL{}, + }, + { + description: "error on line 3", + sourceFile: `This is a paragraph. + +This is a link to a [docs page](https://www.goteleport.com/docs/enroll-databases).`, + docsFilenames: []string{ + "docs/pages/enroll-dbs.mdx", + }, + redirects: RedirectConfig{}, + expected: []staleDocURL{ + { + Text: "goteleport.com/docs/enroll-databases", + Line: 3, + }, + }, + }, + { + description: "no subdomain in docs link, no error", + sourceFile: `https://goteleport.com/docs/enroll-databases`, + docsFilenames: []string{ + "docs/pages/enroll-databases.mdx", + }, + redirects: RedirectConfig{}, + expected: []staleDocURL{}, + }, + { + description: "query string in docs link, no error", + sourceFile: `https://www.goteleport.com/docs/enroll-databases?scope="enterprise"`, + docsFilenames: []string{ + "docs/pages/enroll-databases.mdx", + }, + redirects: RedirectConfig{}, + expected: []staleDocURL{}, + }, + { + description: "fragment in docs link, no error", + sourceFile: `https://www.goteleport.com/docs/enroll-databases#step14-install-teleport`, + docsFilenames: []string{ + "docs/pages/enroll-databases.mdx", + }, + redirects: RedirectConfig{}, + expected: []staleDocURL{}, + }, + { + description: "trailing slash in docs link, no error", + sourceFile: `https://www.goteleport.com/docs/enroll-databases/`, + docsFilenames: []string{ + "docs/pages/enroll-databases.mdx", + }, + redirects: RedirectConfig{}, + expected: []staleDocURL{}, + }, + { + description: "link to a Docusaurus index page, no error", + sourceFile: `https://www.goteleport.com/docs/admin-guides/enroll-databases/`, + docsFilenames: []string{ + "docs/pages/admin-guides/enroll-databases/enroll-databases.mdx", + }, + redirects: RedirectConfig{}, + expected: []staleDocURL{}, + }, + // TODO: index page with redirect + // TODO: index page with no redirect + } + + for _, c := range cases { + fs := afero.NewMemMapFs() + for _, n := range c.docsFilenames { + _, err := fs.Create(path.Join("docs", n)) + assert.NoError(t, err) + } + + t.Run(c.description, func(t *testing.T) { + urls := staleDocsURLs(fs, c.redirects, strings.NewReader(c.sourceFile), "docs") + assert.Equal(t, c.expected, urls) + }) + } +} + +func TestString(t *testing.T) { + expected := `my/file/1.go: + - line 1: https://goteleport.com/docs/page1 + - line 10: https://goteleport.com/docs/page2 + - line 5: https://goteleport.com/docs/page3 + +my/file/2.go: + - line 304: https://goteleport.com/docs/page1 + - line 1003: https://goteleport.com/docs/page2 + +my/file/3.go: + - line 19: https://goteleport.com/docs/page1 + - line 253: https://goteleport.com/docs/page2 + +` + + data := staleDocsURLData{ + "my/file/1.go": []staleDocURL{ + {Line: 1, Text: "https://goteleport.com/docs/page1"}, + {Line: 10, Text: "https://goteleport.com/docs/page2"}, + {Line: 5, Text: "https://goteleport.com/docs/page3"}, + }, + + "my/file/2.go": []staleDocURL{ + {Line: 304, Text: "https://goteleport.com/docs/page1"}, + {Line: 1003, Text: "https://goteleport.com/docs/page2"}, + }, + + "my/file/3.go": []staleDocURL{ + {Line: 19, Text: "https://goteleport.com/docs/page1"}, + {Line: 253, Text: "https://goteleport.com/docs/page2"}, + }, + } + + assert.Equal(t, expected, data.String()) +} + +func Test_docURLPathToFilePath(t *testing.T) { + cases := []struct { + description string + input string + expectedShort string + expectedLong string + }{ + { + description: "redirect source", + input: "/admin-guides/databases/", + expectedShort: "docs/pages/admin-guides/databases.mdx", + expectedLong: "docs/pages/admin-guides/databases/databases.mdx", + }, + } + + for _, c := range cases { + t.Run(c.description, func(t *testing.T) { + short, long := docURLPathToFilePath(c.input) + assert.Equal(t, c.expectedShort, short) + assert.Equal(t, c.expectedLong, long) + }) + } +} diff --git a/bot/main.go b/bot/main.go index 7d97a884..58aff330 100644 --- a/bot/main.go +++ b/bot/main.go @@ -82,6 +82,8 @@ func main() { err = b.CheckChangelog(ctx) case "docpaths": err = b.CheckDocsPathsForMissingRedirects(ctx, flags.teleportClonePath) + case "docurls": + err = b.CheckDocsURLs(ctx, flags.teleportClonePath) default: err = trace.BadParameter("unknown workflow: %v", flags.workflow) }