cmd/golangorg/server_test.go - website - Git at Google

 // Copyright 2013 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package main

 import (
 	"bytes"
 	"fmt"
 	"go/build"
 	"net/http/httptest"
 	"net/url"
 	"os"
 	pathpkg "path"
 	"path/filepath"
 	"runtime"
 	"strings"
 	"testing"

 	"golang.org/x/net/html"
 	"golang.org/x/website/internal/webtest"
 )

 func TestWeb(t *testing.T) {
 	h := NewHandler("../../_content", runtime.GOROOT())

 	files, err := filepath.Glob("testdata/*.txt")
 	if err != nil {
 		t.Fatal(err)
 	}
 	for _, file := range files {
 		switch filepath.ToSlash(file) {
 		case "testdata/live.txt":
 			continue
 		case "testdata/go1.19.txt":
 			if !haveRelease("go1.19") {
 				continue
 			}
 		}
 		webtest.TestHandler(t, file, h)
 	}
 }

 func haveRelease(release string) bool {
 	for _, tag := range build.Default.ReleaseTags {
 		if tag == release {
 			return true
 		}
 	}
 	return false
 }

 var bads = []string{
 	"&amp;lt;",
 	"&amp;gt;",
 	"&amp;amp;",
 	" < ",
 	"<-",
 	"& ",
 }

 var ignoreBads = []string{
 	// This JS appears on all the talks pages.
 	`window["location"] && window["location"]["hostname"] == "go.dev/talks"`,
 }

 // findBad returns (only) the lines containing badly escaped HTML in body.
 // If findBad returns the empty string, there is no badly escaped HTML.
 func findBad(body string) string {
 	lines := strings.SplitAfter(body, "\n")
 	var out []string
 Lines:
 	for _, line := range lines {
 		for _, ig := range ignoreBads {
 			if strings.Contains(line, ig) {
 				continue Lines
 			}
 		}
 		for _, b := range bads {
 			if strings.Contains(line, b) {
 				out = append(out, line)
 				break
 			}
 		}
 	}
 	return strings.Join(out, "")
 }

 func TestAll(t *testing.T) {
 	h := NewHandler("../../_content", runtime.GOROOT())

 	get := func(url string) (code int, body string, err error) {
 		if url == "https://go.dev/rebuild" {
 			// /rebuild reads from cloud storage so pretend it's fine.
 			return 200, "", nil
 		}
 		rec := httptest.NewRecorder()
 		rec.Body = new(bytes.Buffer)
 		h.ServeHTTP(rec, httptest.NewRequest("GET", url, nil))
 		if rec.Code != 200 && rec.Code/10 != 30 {
 			return rec.Code, rec.Body.String(), fmt.Errorf("GET %s: %d, want 200 or 30x", url, rec.Code)
 		}
 		return rec.Code, rec.Body.String(), nil
 	}

 	// Assume any URL with these prefixes exists.
 	skips := []string{
 		"/issue/",
 		"/pkg/",
 		"/s/",
 		"/wiki/",
 		"/play/p/",
 	}

 	// Do not process these paths or path prefixes.
 	ignores := []string{
 		// Wiki is in a different repo; errors there should not block production push.
 		"/wiki/",

 		// Support files not meant to be served directly.
 		"/doc/articles/wiki/",
 		"/talks/2013/highperf/",
 		"/talks/2016/refactor/",
 		"/tour/static/partials/",
 	}

 	// Only check and report a URL the first time we see it.
 	// Otherwise we recheck all the URLs in the page frames for every page.
 	checked := make(map[string]bool)

 	testTree := func(dir, prefix string) {
 		filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
 			if err != nil {
 				t.Fatal(err)
 			}
 			path = filepath.ToSlash(path)
 			siteURL := strings.TrimPrefix(path, dir)
 			for _, ig := range ignores {
 				if strings.HasPrefix(siteURL, ig) {
 					return nil
 				}
 			}
 			siteURL = prefix + siteURL // add https://go.dev/

 			if strings.HasSuffix(path, ".md") ||
 				strings.HasSuffix(path, ".html") ||
 				strings.HasSuffix(path, ".article") ||
 				strings.HasSuffix(path, ".slide") {
 				if !strings.Contains(path, "/talks/") {
 					siteURL = strings.TrimSuffix(siteURL, pathpkg.Ext(path))
 				}
 				if strings.HasSuffix(siteURL, "/index") {
 					siteURL = strings.TrimSuffix(siteURL, "index")
 				}

 				// Check that page can be loaded.
 				_, body, err := get(siteURL)
 				if err != nil {
 					t.Errorf("%v\n%s", err, body)
 					return nil
 				}

 				// Check that page is valid HTML.
 				// First check for over- or under-escaped HTML.
 				bad := findBad(body)
 				if bad != "" {
 					t.Errorf("GET %s: contains improperly escaped HTML\n%s", siteURL, bad)
 					return nil
 				}

 				// Now check all the links to other pages on this server.
 				// (Pages on other servers are too expensive to check
 				// and would cause test failures if servers went down
 				// or moved their contents.)
 				doc, err := html.Parse(strings.NewReader(body))
 				if err != nil {
 					t.Errorf("GET %s: parsing HTML: %v", siteURL, err)
 					return nil
 				}

 				base, err := url.Parse(siteURL)
 				if err != nil {
 					t.Fatalf("cannot parse site URL: %v", err)
 				}

 				// Walk HTML looking for <a href=...>, <img src=...>, and <script src=...>.
 				var checkLinks func(*html.Node)
 				checkLinks = func(n *html.Node) {
 					for c := n.FirstChild; c != nil; c = c.NextSibling {
 						checkLinks(c)
 					}
 					var targ string
 					if n.Type == html.ElementNode {
 						switch n.Data {
 						case "a":
 							targ = findAttr(n, "href")
 						case "img", "script":
 							targ = findAttr(n, "src")
 						}
 					}
 					// Ignore no target or #fragment.
 					if targ == "" || strings.HasPrefix(targ, "#") {
 						return
 					}

 					// Parse target as URL.
 					u, err := url.Parse(targ)
 					if err != nil {
 						t.Errorf("GET %s: found unparseable URL %s: %v", siteURL, targ, err)
 						return
 					}

 					// Check whether URL is canonicalized properly.
 					if fix := fixURL(u); fix != "" {
 						t.Errorf("GET %s: found link to %s, should be %s", siteURL, targ, fix)
 						return
 					}

 					// Skip checking URLs on other servers.
 					if u.Scheme != "" || u.Host != "" {
 						return
 					}

 					// Skip paths that we cannot really check in tests,
 					// like the /s/ shortener or redirects to GitHub.
 					for _, skip := range skips {
 						if strings.HasPrefix(u.Path, skip) {
 							return
 						}
 					}
 					if u.Path == "/doc/godebug" {
 						// Lives in GOROOT and does not exist in Go 1.20,
 						// so skip the check to avoid failing the test on Go 1.20.
 						return
 					}

 					// Clear #fragment and build up fully qualified https://go.dev/ URL and check.
 					// Only check each link one time during this test,
 					// or else we re-check all the frame links on every page.
 					u.Fragment = ""
 					u.RawFragment = ""
 					full := base.ResolveReference(u).String()
 					if checked[full] {
 						return
 					}
 					checked[full] = true
 					if _, _, err := get(full); err != nil {
 						t.Errorf("GET %s: found broken link to %s:\n%s", siteURL, targ, err)
 					}
 				}
 				checkLinks(doc)
 			}
 			return nil
 		})
 	}

 	testTree("../../_content", "https://go.dev")
 }

 // fixURL returns the corrected URL for u,
 // or the empty string if u is fine.
 func fixURL(u *url.URL) string {
 	switch u.Host {
 	case "golang.org":
 		if strings.HasPrefix(u.Path, "/x/") {
 			return ""
 		}
 		fallthrough
 	case "go.dev":
 		u.Host = ""
 		u.Scheme = ""
 		if u.Path == "" {
 			u.Path = "/"
 		}
 		return u.String()
 	case "blog.golang.org",
 		"blog.go.dev",
 		"learn.golang.org",
 		"learn.go.dev",
 		"play.golang.org",
 		"play.go.dev",
 		"tour.golang.org",
 		"tour.go.dev",
 		"talks.golang.org",
 		"talks.go.dev":
 		name, _, _ := strings.Cut(u.Host, ".")
 		u.Host = ""
 		u.Scheme = ""
 		u.Path = "/" + name + u.Path
 		return u.String()
 	case "github.com":
 		if strings.HasPrefix(u.Path, "/golang/go/issues/") {
 			u.Host = "go.dev"
 			u.Path = "/issue/" + strings.TrimPrefix(u.Path, "/golang/go/issues/")
 			return u.String()
 		}
 		if strings.HasPrefix(u.Path, "/golang/go/wiki/") {
 			u.Host = "go.dev"
 			u.Path = "/wiki/" + strings.TrimPrefix(u.Path, "/golang/go/wiki/")
 			return u.String()
 		}
 	}
 	return ""
 }

 // findAttr returns the value for n's attribute with the given name.
 func findAttr(n *html.Node, name string) string {
 	for _, a := range n.Attr {
 		if a.Key == name {
 			return a.Val
 		}
 	}
 	return ""
 }
	// Copyright 2013 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package main

	import (
	"bytes"
	"fmt"
	"go/build"
	"net/http/httptest"
	"net/url"
	"os"
	pathpkg "path"
	"path/filepath"
	"runtime"
	"strings"
	"testing"

	"golang.org/x/net/html"
	"golang.org/x/website/internal/webtest"
	)

	func TestWeb(t *testing.T) {
	h := NewHandler("../../_content", runtime.GOROOT())

	files, err := filepath.Glob("testdata/*.txt")
	if err != nil {
	t.Fatal(err)
	}
	for _, file := range files {
	switch filepath.ToSlash(file) {
	case "testdata/live.txt":
	continue
	case "testdata/go1.19.txt":
	if !haveRelease("go1.19") {
	continue
	}
	}
	webtest.TestHandler(t, file, h)
	}
	}

	func haveRelease(release string) bool {
	for _, tag := range build.Default.ReleaseTags {
	if tag == release {
	return true
	}
	}
	return false
	}

	var bads = []string{
	"&lt;",
	"&gt;",
	"&amp;",
	" < ",
	"<-",
	"& ",
	}

	var ignoreBads = []string{
	// This JS appears on all the talks pages.
	`window["location"] && window["location"]["hostname"] == "go.dev/talks"`,
	}

	// findBad returns (only) the lines containing badly escaped HTML in body.
	// If findBad returns the empty string, there is no badly escaped HTML.
	func findBad(body string) string {
	lines := strings.SplitAfter(body, "\n")
	var out []string
	Lines:
	for _, line := range lines {
	for _, ig := range ignoreBads {
	if strings.Contains(line, ig) {
	continue Lines
	}
	}
	for _, b := range bads {
	if strings.Contains(line, b) {
	out = append(out, line)
	break
	}
	}
	}
	return strings.Join(out, "")
	}

	func TestAll(t *testing.T) {
	h := NewHandler("../../_content", runtime.GOROOT())

	get := func(url string) (code int, body string, err error) {
	if url == "https://go.dev/rebuild" {
	// /rebuild reads from cloud storage so pretend it's fine.
	return 200, "", nil
	}
	rec := httptest.NewRecorder()
	rec.Body = new(bytes.Buffer)
	h.ServeHTTP(rec, httptest.NewRequest("GET", url, nil))
	if rec.Code != 200 && rec.Code/10 != 30 {
	return rec.Code, rec.Body.String(), fmt.Errorf("GET %s: %d, want 200 or 30x", url, rec.Code)
	}
	return rec.Code, rec.Body.String(), nil
	}

	// Assume any URL with these prefixes exists.
	skips := []string{
	"/issue/",
	"/pkg/",
	"/s/",
	"/wiki/",
	"/play/p/",
	}

	// Do not process these paths or path prefixes.
	ignores := []string{
	// Wiki is in a different repo; errors there should not block production push.
	"/wiki/",

	// Support files not meant to be served directly.
	"/doc/articles/wiki/",
	"/talks/2013/highperf/",
	"/talks/2016/refactor/",
	"/tour/static/partials/",
	}

	// Only check and report a URL the first time we see it.
	// Otherwise we recheck all the URLs in the page frames for every page.
	checked := make(map[string]bool)

	testTree := func(dir, prefix string) {
	filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
	if err != nil {
	t.Fatal(err)
	}
	path = filepath.ToSlash(path)
	siteURL := strings.TrimPrefix(path, dir)
	for _, ig := range ignores {
	if strings.HasPrefix(siteURL, ig) {
	return nil
	}
	}
	siteURL = prefix + siteURL // add https://go.dev/

	if strings.HasSuffix(path, ".md") \|\|
	strings.HasSuffix(path, ".html") \|\|
	strings.HasSuffix(path, ".article") \|\|
	strings.HasSuffix(path, ".slide") {
	if !strings.Contains(path, "/talks/") {
	siteURL = strings.TrimSuffix(siteURL, pathpkg.Ext(path))
	}
	if strings.HasSuffix(siteURL, "/index") {
	siteURL = strings.TrimSuffix(siteURL, "index")
	}

	// Check that page can be loaded.
	_, body, err := get(siteURL)
	if err != nil {
	t.Errorf("%v\n%s", err, body)
	return nil
	}

	// Check that page is valid HTML.
	// First check for over- or under-escaped HTML.
	bad := findBad(body)
	if bad != "" {
	t.Errorf("GET %s: contains improperly escaped HTML\n%s", siteURL, bad)
	return nil
	}

	// Now check all the links to other pages on this server.
	// (Pages on other servers are too expensive to check
	// and would cause test failures if servers went down
	// or moved their contents.)
	doc, err := html.Parse(strings.NewReader(body))
	if err != nil {
	t.Errorf("GET %s: parsing HTML: %v", siteURL, err)
	return nil
	}

	base, err := url.Parse(siteURL)
	if err != nil {
	t.Fatalf("cannot parse site URL: %v", err)
	}

	// Walk HTML looking for <a href=...>, <img src=...>, and <script src=...>.
	var checkLinks func(*html.Node)
	checkLinks = func(n *html.Node) {
	for c := n.FirstChild; c != nil; c = c.NextSibling {
	checkLinks(c)
	}
	var targ string
	if n.Type == html.ElementNode {
	switch n.Data {
	case "a":
	targ = findAttr(n, "href")
	case "img", "script":
	targ = findAttr(n, "src")
	}
	}
	// Ignore no target or #fragment.
	if targ == "" \|\| strings.HasPrefix(targ, "#") {
	return
	}

	// Parse target as URL.
	u, err := url.Parse(targ)
	if err != nil {
	t.Errorf("GET %s: found unparseable URL %s: %v", siteURL, targ, err)
	return
	}

	// Check whether URL is canonicalized properly.
	if fix := fixURL(u); fix != "" {
	t.Errorf("GET %s: found link to %s, should be %s", siteURL, targ, fix)
	return
	}

	// Skip checking URLs on other servers.
	if u.Scheme != "" \|\| u.Host != "" {
	return
	}

	// Skip paths that we cannot really check in tests,
	// like the /s/ shortener or redirects to GitHub.
	for _, skip := range skips {
	if strings.HasPrefix(u.Path, skip) {
	return
	}
	}
	if u.Path == "/doc/godebug" {
	// Lives in GOROOT and does not exist in Go 1.20,
	// so skip the check to avoid failing the test on Go 1.20.
	return
	}

	// Clear #fragment and build up fully qualified https://go.dev/ URL and check.
	// Only check each link one time during this test,
	// or else we re-check all the frame links on every page.
	u.Fragment = ""
	u.RawFragment = ""
	full := base.ResolveReference(u).String()
	if checked[full] {
	return
	}
	checked[full] = true
	if _, _, err := get(full); err != nil {
	t.Errorf("GET %s: found broken link to %s:\n%s", siteURL, targ, err)
	}
	}
	checkLinks(doc)
	}
	return nil
	})
	}

	testTree("../../_content", "https://go.dev")
	}

	// fixURL returns the corrected URL for u,
	// or the empty string if u is fine.
	func fixURL(u *url.URL) string {
	switch u.Host {
	case "golang.org":
	if strings.HasPrefix(u.Path, "/x/") {
	return ""
	}
	fallthrough
	case "go.dev":
	u.Host = ""
	u.Scheme = ""
	if u.Path == "" {
	u.Path = "/"
	}
	return u.String()
	case "blog.golang.org",
	"blog.go.dev",
	"learn.golang.org",
	"learn.go.dev",
	"play.golang.org",
	"play.go.dev",
	"tour.golang.org",
	"tour.go.dev",
	"talks.golang.org",
	"talks.go.dev":
	name, _, _ := strings.Cut(u.Host, ".")
	u.Host = ""
	u.Scheme = ""
	u.Path = "/" + name + u.Path
	return u.String()
	case "github.com":
	if strings.HasPrefix(u.Path, "/golang/go/issues/") {
	u.Host = "go.dev"
	u.Path = "/issue/" + strings.TrimPrefix(u.Path, "/golang/go/issues/")
	return u.String()
	}
	if strings.HasPrefix(u.Path, "/golang/go/wiki/") {
	u.Host = "go.dev"
	u.Path = "/wiki/" + strings.TrimPrefix(u.Path, "/golang/go/wiki/")
	return u.String()
	}
	}
	return ""
	}

	// findAttr returns the value for n's attribute with the given name.
	func findAttr(n *html.Node, name string) string {
	for _, a := range n.Attr {
	if a.Key == name {
	return a.Val
	}
	}
	return ""
	}