| // Copyright 2013 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package main |
| |
| import ( |
| "bytes" |
| "fmt" |
| "go/build" |
| "net/http/httptest" |
| "net/url" |
| "os" |
| pathpkg "path" |
| "path/filepath" |
| "runtime" |
| "strings" |
| "testing" |
| |
| "golang.org/x/net/html" |
| "golang.org/x/website/internal/webtest" |
| ) |
| |
| func TestWeb(t *testing.T) { |
| h := NewHandler("../../_content", runtime.GOROOT()) |
| |
| files, err := filepath.Glob("testdata/*.txt") |
| if err != nil { |
| t.Fatal(err) |
| } |
| for _, file := range files { |
| switch filepath.ToSlash(file) { |
| case "testdata/live.txt": |
| continue |
| case "testdata/go1.19.txt": |
| if !haveRelease("go1.19") { |
| continue |
| } |
| } |
| webtest.TestHandler(t, file, h) |
| } |
| } |
| |
| func haveRelease(release string) bool { |
| for _, tag := range build.Default.ReleaseTags { |
| if tag == release { |
| return true |
| } |
| } |
| return false |
| } |
| |
| var bads = []string{ |
| "<", |
| ">", |
| "&", |
| " < ", |
| "<-", |
| "& ", |
| } |
| |
| var ignoreBads = []string{ |
| // This JS appears on all the talks pages. |
| `window["location"] && window["location"]["hostname"] == "go.dev/talks"`, |
| } |
| |
| // findBad returns (only) the lines containing badly escaped HTML in body. |
| // If findBad returns the empty string, there is no badly escaped HTML. |
| func findBad(body string) string { |
| lines := strings.SplitAfter(body, "\n") |
| var out []string |
| Lines: |
| for _, line := range lines { |
| for _, ig := range ignoreBads { |
| if strings.Contains(line, ig) { |
| continue Lines |
| } |
| } |
| for _, b := range bads { |
| if strings.Contains(line, b) { |
| out = append(out, line) |
| break |
| } |
| } |
| } |
| return strings.Join(out, "") |
| } |
| |
| func TestAll(t *testing.T) { |
| h := NewHandler("../../_content", runtime.GOROOT()) |
| |
| get := func(url string) (code int, body string, err error) { |
| if url == "https://go.dev/rebuild" { |
| // /rebuild reads from cloud storage so pretend it's fine. |
| return 200, "", nil |
| } |
| rec := httptest.NewRecorder() |
| rec.Body = new(bytes.Buffer) |
| h.ServeHTTP(rec, httptest.NewRequest("GET", url, nil)) |
| if rec.Code != 200 && rec.Code/10 != 30 { |
| return rec.Code, rec.Body.String(), fmt.Errorf("GET %s: %d, want 200 or 30x", url, rec.Code) |
| } |
| return rec.Code, rec.Body.String(), nil |
| } |
| |
| // Assume any URL with these prefixes exists. |
| skips := []string{ |
| "/issue/", |
| "/pkg/", |
| "/s/", |
| "/wiki/", |
| "/play/p/", |
| } |
| |
| // Do not process these paths or path prefixes. |
| ignores := []string{ |
| // Wiki is in a different repo; errors there should not block production push. |
| "/wiki/", |
| |
| // Support files not meant to be served directly. |
| "/doc/articles/wiki/", |
| "/talks/2013/highperf/", |
| "/talks/2016/refactor/", |
| "/tour/static/partials/", |
| } |
| |
| // Only check and report a URL the first time we see it. |
| // Otherwise we recheck all the URLs in the page frames for every page. |
| checked := make(map[string]bool) |
| |
| testTree := func(dir, prefix string) { |
| filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { |
| if err != nil { |
| t.Fatal(err) |
| } |
| path = filepath.ToSlash(path) |
| siteURL := strings.TrimPrefix(path, dir) |
| for _, ig := range ignores { |
| if strings.HasPrefix(siteURL, ig) { |
| return nil |
| } |
| } |
| siteURL = prefix + siteURL // add https://go.dev/ |
| |
| if strings.HasSuffix(path, ".md") || |
| strings.HasSuffix(path, ".html") || |
| strings.HasSuffix(path, ".article") || |
| strings.HasSuffix(path, ".slide") { |
| if !strings.Contains(path, "/talks/") { |
| siteURL = strings.TrimSuffix(siteURL, pathpkg.Ext(path)) |
| } |
| if strings.HasSuffix(siteURL, "/index") { |
| siteURL = strings.TrimSuffix(siteURL, "index") |
| } |
| |
| // Check that page can be loaded. |
| _, body, err := get(siteURL) |
| if err != nil { |
| t.Errorf("%v\n%s", err, body) |
| return nil |
| } |
| |
| // Check that page is valid HTML. |
| // First check for over- or under-escaped HTML. |
| bad := findBad(body) |
| if bad != "" { |
| t.Errorf("GET %s: contains improperly escaped HTML\n%s", siteURL, bad) |
| return nil |
| } |
| |
| // Now check all the links to other pages on this server. |
| // (Pages on other servers are too expensive to check |
| // and would cause test failures if servers went down |
| // or moved their contents.) |
| doc, err := html.Parse(strings.NewReader(body)) |
| if err != nil { |
| t.Errorf("GET %s: parsing HTML: %v", siteURL, err) |
| return nil |
| } |
| |
| base, err := url.Parse(siteURL) |
| if err != nil { |
| t.Fatalf("cannot parse site URL: %v", err) |
| } |
| |
| // Walk HTML looking for <a href=...>, <img src=...>, and <script src=...>. |
| var checkLinks func(*html.Node) |
| checkLinks = func(n *html.Node) { |
| for c := n.FirstChild; c != nil; c = c.NextSibling { |
| checkLinks(c) |
| } |
| var targ string |
| if n.Type == html.ElementNode { |
| switch n.Data { |
| case "a": |
| targ = findAttr(n, "href") |
| case "img", "script": |
| targ = findAttr(n, "src") |
| } |
| } |
| // Ignore no target or #fragment. |
| if targ == "" || strings.HasPrefix(targ, "#") { |
| return |
| } |
| |
| // Parse target as URL. |
| u, err := url.Parse(targ) |
| if err != nil { |
| t.Errorf("GET %s: found unparseable URL %s: %v", siteURL, targ, err) |
| return |
| } |
| |
| // Check whether URL is canonicalized properly. |
| if fix := fixURL(u); fix != "" { |
| t.Errorf("GET %s: found link to %s, should be %s", siteURL, targ, fix) |
| return |
| } |
| |
| // Skip checking URLs on other servers. |
| if u.Scheme != "" || u.Host != "" { |
| return |
| } |
| |
| // Skip paths that we cannot really check in tests, |
| // like the /s/ shortener or redirects to GitHub. |
| for _, skip := range skips { |
| if strings.HasPrefix(u.Path, skip) { |
| return |
| } |
| } |
| if u.Path == "/doc/godebug" { |
| // Lives in GOROOT and does not exist in Go 1.20, |
| // so skip the check to avoid failing the test on Go 1.20. |
| return |
| } |
| |
| // Clear #fragment and build up fully qualified https://go.dev/ URL and check. |
| // Only check each link one time during this test, |
| // or else we re-check all the frame links on every page. |
| u.Fragment = "" |
| u.RawFragment = "" |
| full := base.ResolveReference(u).String() |
| if checked[full] { |
| return |
| } |
| checked[full] = true |
| if _, _, err := get(full); err != nil { |
| t.Errorf("GET %s: found broken link to %s:\n%s", siteURL, targ, err) |
| } |
| } |
| checkLinks(doc) |
| } |
| return nil |
| }) |
| } |
| |
| testTree("../../_content", "https://go.dev") |
| } |
| |
| // fixURL returns the corrected URL for u, |
| // or the empty string if u is fine. |
| func fixURL(u *url.URL) string { |
| switch u.Host { |
| case "golang.org": |
| if strings.HasPrefix(u.Path, "/x/") { |
| return "" |
| } |
| fallthrough |
| case "go.dev": |
| u.Host = "" |
| u.Scheme = "" |
| if u.Path == "" { |
| u.Path = "/" |
| } |
| return u.String() |
| case "blog.golang.org", |
| "blog.go.dev", |
| "learn.golang.org", |
| "learn.go.dev", |
| "play.golang.org", |
| "play.go.dev", |
| "tour.golang.org", |
| "tour.go.dev", |
| "talks.golang.org", |
| "talks.go.dev": |
| name, _, _ := strings.Cut(u.Host, ".") |
| u.Host = "" |
| u.Scheme = "" |
| u.Path = "/" + name + u.Path |
| return u.String() |
| case "github.com": |
| if strings.HasPrefix(u.Path, "/golang/go/issues/") { |
| u.Host = "go.dev" |
| u.Path = "/issue/" + strings.TrimPrefix(u.Path, "/golang/go/issues/") |
| return u.String() |
| } |
| if strings.HasPrefix(u.Path, "/golang/go/wiki/") { |
| u.Host = "go.dev" |
| u.Path = "/wiki/" + strings.TrimPrefix(u.Path, "/golang/go/wiki/") |
| return u.String() |
| } |
| } |
| return "" |
| } |
| |
| // findAttr returns the value for n's attribute with the given name. |
| func findAttr(n *html.Node, name string) string { |
| for _, a := range n.Attr { |
| if a.Key == name { |
| return a.Val |
| } |
| } |
| return "" |
| } |