Skip to content

Commit

Permalink
Add a workflow to check docs URLs
Browse files Browse the repository at this point in the history
Like the workflow to check for necessary redirects to match renamed or
deleted docs pages, this workflow takes the path to a
`gravitational/teleport` clone. It finds mentions of Teleport docs site
paths in each file changed by a PR and ensures that each path has a
corresponding docs page or redirect source.
  • Loading branch information
ptgott committed Dec 24, 2024
1 parent 5b46fbd commit 929c451
Show file tree
Hide file tree
Showing 5 changed files with 369 additions and 6 deletions.
7 changes: 5 additions & 2 deletions bot/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@ go 1.23
require (
github.com/google/go-github/v37 v37.0.0
github.com/gravitational/trace v1.3.1
github.com/spf13/afero v1.11.0
github.com/stretchr/testify v1.8.4
golang.org/x/oauth2 v0.13.0
golang.org/x/oauth2 v0.15.0
)

require golang.org/x/text v0.14.0 // indirect

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/golang/protobuf v1.5.3 // indirect
Expand All @@ -17,7 +20,7 @@ require (
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/net v0.17.0 // indirect
golang.org/x/net v0.19.0 // indirect
golang.org/x/sys v0.15.0 // indirect
golang.org/x/term v0.15.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
Expand Down
12 changes: 8 additions & 4 deletions bot/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8=
github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
Expand Down Expand Up @@ -99,12 +101,12 @@ golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c=
golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.13.0 h1:jDDenyj+WgFtmV3zYVoi8aE2BwtXFLWOA67ZfNWftiY=
golang.org/x/oauth2 v0.13.0/go.mod h1:/JMhi4ZRXAf4HG9LiNmxvk+45+96RUlVThiH8FzNBn0=
golang.org/x/oauth2 v0.15.0 h1:s8pnnxNVzjWyrvYdFUQq5llS1PX2zhPXmccZv99h7uQ=
golang.org/x/oauth2 v0.15.0/go.mod h1:q48ptWNTY5XWf+JNten23lcvHpLJ0ZSxF5ttTHKVCAM=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
Expand Down Expand Up @@ -137,6 +139,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
Expand Down
158 changes: 158 additions & 0 deletions bot/internal/bot/doclinks.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package bot

import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path"
"regexp"
"strings"
"text/template"

"github.com/gravitational/trace"
"github.com/spf13/afero"
)

type staleDocURL struct {
Text string
Line int
}

// CheckDocsURLs opens each file changed by a pull request and checks whether
// any URL paths for the Teleport documentation site within the file are
// incorrect. An incorrect URL path is one that does not correspond to either:
// - a docs page in the docs/pages directory within teleportClonePath
// - the source of a redirect named in the docs configuration file at
// docs/config.json within teleportClonePath
func (b *Bot) CheckDocsURLs(ctx context.Context, teleportClonePath string) error {
if teleportClonePath == "" {
return trace.Wrap(errors.New("unable to load Teleport documentation config with an empty path"))
}

docsConfigPath := path.Join(teleportClonePath, "docs", "config.json")
f, err := os.Open(docsConfigPath)
if err != nil {
return trace.Wrap(fmt.Errorf("unable to open docs config file at %v: %v", docsConfigPath, err))
}
defer f.Close()

var c DocsConfig
if err := json.NewDecoder(f).Decode(&c); err != nil {
return trace.Wrap(fmt.Errorf("unable to load redirect configuration from %v: %v", docsConfigPath, err))
}

files, err := b.c.GitHub.ListFiles(ctx, b.c.Environment.Organization, b.c.Environment.Repository, b.c.Environment.Number)
if err != nil {
return trace.Wrap(fmt.Errorf("unable to fetch files for PR %v: %v", b.c.Environment.Number, err))
}

data := make(staleDocsURLData)
for _, f := range files {
l, err := os.Open(f.Name)
if err != nil {
return trace.Wrap(fmt.Errorf("unable to open PR file %v: %v", f.Name, err))
}
defer l.Close()

stale := staleDocsURLs(afero.NewOsFs(), c.Redirects, l, teleportClonePath)
if len(stale) == 0 {
continue
}
data[f.Name] = stale
}

if len(data) != 0 {
return trace.Wrap(fmt.Errorf("found incorrect documentation URLs in the following files:\n%v", data))
}

return nil
}

type staleDocsURLData map[string][]staleDocURL

const staleURLDataTempl = `{{ range $key, $val := . }}{{ $key }}:
{{ range $val }} - line {{ .Line }}: {{ .Text }}
{{ end }}
{{ end }}`

func (s staleDocsURLData) String() string {
var buf bytes.Buffer
template.Must(template.New("").Parse(staleURLDataTempl)).Execute(&buf, s)
return buf.String()
}

var docURLRegexp = regexp.MustCompile(`goteleport.com(/([a-zA-Z0-9\._-]+/?)+)`)

// docURLPathToFilePath converts urlpath, which is a valid redirect source, to
// two possible docs page file paths that would correspond to urlpath, given
// Docusaurus' logic for generating routes for category index pages.
//
// For example, "/admin-guides/database-access/" would map to either
// "docs/pages/admin-guides/database-access/database-access.mdx" or
// "docs/pages/admin-guides/database-access.mdx", depending on whether
// database-access.mdx is a category index page.
//
// In the return value, standard page path comes first, followed by the category
// index page path.
func docURLPathToFilePath(urlpath string) (string, string) {
segs := strings.Split(strings.Trim(urlpath, "/"), "/")
short := docsPrefix + strings.Join(segs, "/") + ".mdx"
if len(segs) == 0 {
return short, short
}
long := docsPrefix + strings.Join(append(segs, segs[len(segs)-1]), "/") + ".mdx"
return short, long
}

// staleDocsURLs examines file to detect stale docs site URLs. A style docs site
// URL is a URL of a docs page that neither corresponds to a file in fs nor a
// redirect source in conf.
//
// staleDocsURLs assumes that docs paths are in fs and relative to
// teleportClonePath.
func staleDocsURLs(fs afero.Fs, conf RedirectConfig, file io.Reader, teleportClonePath string) []staleDocURL {
sources := make(map[string]struct{})
for _, s := range conf {
sources[s.Source] = struct{}{}
}

res := []staleDocURL{}
sc := bufio.NewScanner(file)
sc.Split(bufio.ScanLines)
var line int
for sc.Scan() {
line++
m := docURLRegexp.FindAllStringSubmatch(sc.Text(), -1)
for _, a := range m {
urlpath := a[1]
// Redirect sources include a trailing "/".
if !strings.HasSuffix(urlpath, "/") {
urlpath += "/"
}

// While goteleport.com/docs URLs include the /docs/
// path segment, redirect sources do not.
urlpath = strings.TrimPrefix(urlpath, "/docs")

_, hasRedirect := sources[urlpath]
short, long := docURLPathToFilePath(urlpath)
_, err1 := fs.Stat(path.Join(teleportClonePath, short))
_, err2 := fs.Stat(path.Join(teleportClonePath, long))
if hasRedirect || err1 == nil || err2 == nil {
continue
}

res = append(res, staleDocURL{
Text: a[0],
Line: line,
})
}
}

return res
}
Loading

0 comments on commit 929c451

Please sign in to comment.