| // Copyright 2025 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| //go:build ignore |
| |
| package main |
| |
| import ( |
| "bytes" |
| _ "embed" |
| "fmt" |
| "go/format" |
| "io" |
| "log" |
| "maps" |
| "os" |
| "slices" |
| "strconv" |
| "strings" |
| ) |
| |
| // We embed this source file in the resulting code-generation program in order |
| // to extract the definitions of the encoding type and constants from it and |
| // include them in the generated file. |
| // |
| //go:embed gen_encoding_table.go |
| var genSource string |
| |
| const filename = "encoding_table.go" |
| |
| func main() { |
| var out bytes.Buffer |
| fmt.Fprintln(&out, "// Code generated from gen_encoding_table.go using 'go generate'; DO NOT EDIT.") |
| fmt.Fprintln(&out) |
| fmt.Fprintln(&out, "// Copyright 2025 The Go Authors. All rights reserved.") |
| fmt.Fprintln(&out, "// Use of this source code is governed by a BSD-style") |
| fmt.Fprintln(&out, "// license that can be found in the LICENSE file.") |
| fmt.Fprintln(&out) |
| fmt.Fprintln(&out, "package url") |
| fmt.Fprintln(&out) |
| generateEnc(&out, genSource) |
| generateTable(&out) |
| |
| formatted, err := format.Source(out.Bytes()) |
| if err != nil { |
| log.Fatal("format:", err) |
| } |
| |
| err = os.WriteFile(filename, formatted, 0644) |
| if err != nil { |
| log.Fatal("WriteFile:", err) |
| } |
| } |
| |
| func generateEnc(w io.Writer, src string) { |
| var writeLine bool |
| for line := range strings.Lines(src) { |
| if strings.HasPrefix(line, "// START encoding") { |
| writeLine = true |
| continue |
| } |
| if strings.HasPrefix(line, "// END encoding") { |
| return |
| } |
| if writeLine { |
| fmt.Fprint(w, line) |
| } |
| } |
| } |
| |
| func generateTable(w io.Writer) { |
| fmt.Fprintln(w, "var table = [256]encoding{") |
| |
| // Sort the encodings (in decreasing order) to guarantee a stable output. |
| sortedEncs := slices.Sorted(maps.Keys(encNames)) |
| slices.Reverse(sortedEncs) |
| |
| for i := range 256 { |
| c := byte(i) |
| var lineBuf bytes.Buffer |
| |
| // Write key to line buffer. |
| lineBuf.WriteString(strconv.QuoteRune(rune(c))) |
| |
| lineBuf.WriteByte(':') |
| |
| // Write value to line buffer. |
| blankVal := true |
| if ishex(c) { |
| // Set the hexChar bit if this char is hexadecimal. |
| lineBuf.WriteString("hexChar") |
| blankVal = false |
| } |
| for _, enc := range sortedEncs { |
| if !shouldEscape(c, enc) { |
| if !blankVal { |
| lineBuf.WriteByte('|') |
| } |
| // Set this encoding mode's bit if this char should NOT be |
| // escaped. |
| name := encNames[enc] |
| lineBuf.WriteString(name) |
| blankVal = false |
| } |
| } |
| |
| if !blankVal { |
| lineBuf.WriteString(",\n") |
| w.Write(lineBuf.Bytes()) |
| } |
| } |
| fmt.Fprintln(w, "}") |
| } |
| |
| // START encoding (keep this marker comment in sync with genEnc) |
| type encoding uint8 |
| |
| const ( |
| encodePath encoding = 1 << iota |
| encodePathSegment |
| encodeHost |
| encodeZone |
| encodeUserPassword |
| encodeQueryComponent |
| encodeFragment |
| |
| // hexChar is actually NOT an encoding mode, but there are only seven |
| // encoding modes. We might as well abuse the otherwise unused most |
| // significant bit in uint8 to indicate whether a character is |
| // hexadecimal. |
| hexChar |
| ) |
| |
| // END encoding (keep this marker comment in sync with genEnc) |
| |
| // Keep this in sync with the definitions of encoding mode constants. |
| var encNames = map[encoding]string{ |
| encodePath: "encodePath", |
| encodePathSegment: "encodePathSegment", |
| encodeHost: "encodeHost", |
| encodeZone: "encodeZone", |
| encodeUserPassword: "encodeUserPassword", |
| encodeQueryComponent: "encodeQueryComponent", |
| encodeFragment: "encodeFragment", |
| } |
| |
| // Return true if the specified character should be escaped when |
| // appearing in a URL string, according to RFC 3986. |
| // |
| // Please be informed that for now shouldEscape does not check all |
| // reserved characters correctly. See golang.org/issue/5684. |
| func shouldEscape(c byte, mode encoding) bool { |
| // §2.3 Unreserved characters (alphanum) |
| if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { |
| return false |
| } |
| |
| if mode == encodeHost || mode == encodeZone { |
| // §3.2.2 Host allows |
| // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" |
| // as part of reg-name. |
| // We add : because we include :port as part of host. |
| // We add [ ] because we include [ipv6]:port as part of host. |
| // We add < > because they're the only characters left that |
| // we could possibly allow, and Parse will reject them if we |
| // escape them (because hosts can't use %-encoding for |
| // ASCII bytes). |
| switch c { |
| case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"': |
| return false |
| } |
| } |
| |
| switch c { |
| case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) |
| return false |
| |
| case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) |
| // Different sections of the URL allow a few of |
| // the reserved characters to appear unescaped. |
| switch mode { |
| case encodePath: // §3.3 |
| // The RFC allows : @ & = + $ but saves / ; , for assigning |
| // meaning to individual path segments. This package |
| // only manipulates the path as a whole, so we allow those |
| // last three as well. That leaves only ? to escape. |
| return c == '?' |
| |
| case encodePathSegment: // §3.3 |
| // The RFC allows : @ & = + $ but saves / ; , for assigning |
| // meaning to individual path segments. |
| return c == '/' || c == ';' || c == ',' || c == '?' |
| |
| case encodeUserPassword: // §3.2.1 |
| // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in |
| // userinfo, so we must escape only '@', '/', and '?'. |
| // The parsing of userinfo treats ':' as special so we must escape |
| // that too. |
| return c == '@' || c == '/' || c == '?' || c == ':' |
| |
| case encodeQueryComponent: // §3.4 |
| // The RFC reserves (so we must escape) everything. |
| return true |
| |
| case encodeFragment: // §4.1 |
| // The RFC text is silent but the grammar allows |
| // everything, so escape nothing. |
| return false |
| } |
| } |
| |
| if mode == encodeFragment { |
| // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are |
| // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not |
| // need to be escaped. To minimize potential breakage, we apply two restrictions: |
| // (1) we always escape sub-delims outside of the fragment, and (2) we always |
| // escape single quote to avoid breaking callers that had previously assumed that |
| // single quotes would be escaped. See issue #19917. |
| switch c { |
| case '!', '(', ')', '*': |
| return false |
| } |
| } |
| |
| // Everything else must be escaped. |
| return true |
| } |
| |
| func ishex(c byte) bool { |
| return '0' <= c && c <= '9' || |
| 'a' <= c && c <= 'f' || |
| 'A' <= c && c <= 'F' |
| } |