publicsuffix/gen.go - net - Git at Google

 // Copyright 2012 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // +build ignore

 package main

 // This program generates table.go and table_test.go.
 // Invoke as:
 //
 //	go run gen.go -version "xxx"       >table.go
 //	go run gen.go -version "xxx" -test >table_test.go
 //
 // The version is derived from information found at
 // http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat
 // which is linked from http://publicsuffix.org/list/.
 //
 // To fetch a particular hg revision, such as 05b11a8d1ace, pass
 // -url "http://hg.mozilla.org/mozilla-central/raw-file/05b11a8d1ace/netwerk/dns/effective_tld_names.dat"

 import (
 	"bufio"
 	"bytes"
 	"flag"
 	"fmt"
 	"go/format"
 	"io"
 	"net/http"
 	"os"
 	"sort"
 	"strings"

 	"code.google.com/p/go.net/idna"
 )

 const (
 	nodeTypeNormal     = 0
 	nodeTypeException  = 1
 	nodeTypeParentOnly = 2
 )

 func nodeTypeString(n int) string {
 	switch n {
 	case nodeTypeNormal:
 		return "+"
 	case nodeTypeException:
 		return "!"
 	case nodeTypeParentOnly:
 		return "o"
 	}
 	panic("unreachable")
 }

 var (
 	labelEncoding = map[string]uint32{}
 	labelsList    = []string{}
 	labelsMap     = map[string]bool{}
 	rules         = []string{}

 	crush  = flag.Bool("crush", true, "make the generated node text as small as possible")
 	subset = flag.Bool("subset", false, "generate only a subset of the full table, for debugging")
 	url    = flag.String("url",
 		"http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1",
 		"URL of the publicsuffix.org list. If empty, stdin is read instead")
 	v       = flag.Bool("v", false, "verbose output (to stderr)")
 	version = flag.String("version", "", "the effective_tld_names.dat version")
 	test    = flag.Bool("test", false, "generate table_test.go")
 )

 func main() {
 	if err := main1(); err != nil {
 		fmt.Fprintln(os.Stderr, err)
 		os.Exit(1)
 	}
 }

 func main1() error {
 	flag.Parse()
 	if *version == "" {
 		return fmt.Errorf("-version was not specified")
 	}
 	var r io.Reader = os.Stdin
 	if *url != "" {
 		res, err := http.Get(*url)
 		if err != nil {
 			return err
 		}
 		if res.StatusCode != http.StatusOK {
 			return fmt.Errorf("bad GET status for %s: %d", *url, res.Status)
 		}
 		r = res.Body
 		defer res.Body.Close()
 	}

 	var root node
 	buf := new(bytes.Buffer)
 	br := bufio.NewReader(r)
 	for {
 		s, err := br.ReadString('\n')
 		if err != nil {
 			if err == io.EOF {
 				break
 			}
 			return err
 		}
 		s = strings.TrimSpace(s)
 		if s == "" || strings.HasPrefix(s, "//") {
 			continue
 		}
 		s, err = idna.ToASCII(s)
 		if err != nil {
 			return err
 		}

 		if *subset {
 			switch {
 			case s == "ao" || strings.HasSuffix(s, ".ao"):
 			case s == "ar" || strings.HasSuffix(s, ".ar"):
 			case s == "arpa" || strings.HasSuffix(s, ".arpa"):
 			case s == "jp":
 			case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
 			case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
 			case s == "uk" || strings.HasSuffix(s, ".uk"):
 			case s == "tw" || strings.HasSuffix(s, ".tw"):
 			case s == "zw" || strings.HasSuffix(s, ".zw"):
 			case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"):
 				// xn--p1ai is Russian-Cyrillic "рф".
 			default:
 				continue
 			}
 		}

 		rules = append(rules, s)

 		nt, wildcard := nodeTypeNormal, false
 		switch {
 		case strings.HasPrefix(s, "*."):
 			s, nt = s[2:], nodeTypeParentOnly
 			wildcard = true
 		case strings.HasPrefix(s, "!"):
 			s, nt = s[1:], nodeTypeException
 		}
 		labels := strings.Split(s, ".")
 		for n, i := &root, len(labels)-1; i >= 0; i-- {
 			label := labels[i]
 			n = n.child(label)
 			if i == 0 {
 				if nt != nodeTypeParentOnly && n.nodeType == nodeTypeParentOnly {
 					n.nodeType = nt
 				}
 				n.wildcard = n.wildcard || wildcard
 			}
 			labelsMap[label] = true
 		}
 	}
 	labelsList = make([]string, 0, len(labelsMap))
 	for label := range labelsMap {
 		labelsList = append(labelsList, label)
 	}
 	sort.Strings(labelsList)

 	p := printReal
 	if *test {
 		p = printTest
 	}
 	if err := p(buf, &root); err != nil {
 		return err
 	}

 	b, err := format.Source(buf.Bytes())
 	if err != nil {
 		return err
 	}
 	_, err = os.Stdout.Write(b)
 	return err
 }

 func printTest(w io.Writer, n *node) error {
 	fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n")
 	fmt.Fprintf(w, "package publicsuffix\n\nvar rules = [...]string{\n")
 	for _, rule := range rules {
 		fmt.Fprintf(w, "%q,\n", rule)
 	}
 	fmt.Fprintf(w, "}\n\nvar nodeLabels = [...]string{\n")
 	if err := n.walk(w, printNodeLabel); err != nil {
 		return err
 	}
 	fmt.Fprintf(w, "}\n")
 	return nil
 }

 func printReal(w io.Writer, n *node) error {
 	const header = `// generated by go run gen.go; DO NOT EDIT

 package publicsuffix

 const version = %q

 const (
 	nodeTypeNormal     = %d
 	nodeTypeException  = %d
 	nodeTypeParentOnly = %d
 )

 // numTLD is the number of top level domains.
 const numTLD = %d

 `
 	fmt.Fprintf(w, header, *version, nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children))

 	text := makeText()
 	if text == "" {
 		return fmt.Errorf("internal error: makeText returned no text")
 	}
 	for _, label := range labelsList {
 		offset, length := strings.Index(text, label), len(label)
 		if offset < 0 {
 			return fmt.Errorf("internal error: could not find %q in text %q", label, text)
 		}
 		if offset >= 1<<24 || length >= 1<<8 {
 			return fmt.Errorf("text offset/length is too large: %d/%d", offset, length)
 		}
 		labelEncoding[label] = uint32(offset)<<8 | uint32(length)
 	}
 	fmt.Fprintf(w, "// Text is the combined text of all labels.\nconst text = ")
 	for len(text) > 0 {
 		n, plus := len(text), ""
 		if n > 64 {
 			n, plus = 64, " +"
 		}
 		fmt.Fprintf(w, "%q%s\n", text[:n], plus)
 		text = text[n:]
 	}

 	n.walk(w, assignNodeIndexes)

 	fmt.Fprintf(w, `

 // Nodes is the list of nodes. Each node is encoded as two uint32 values.
 //
 // The first uint32 encodes the node's children, nodeType, and a wildcard bit.
 // In the //-comment after each node's data, the indexes of the children are
 // formatted as (0x1234-0x1256). The nodeType is printed as + for normal, ! for
 // exception, and o for parent-only nodes that have children but don't match a
 // domain in their own right. The * denotes the wildcard bit. The layout within
 // the uint32, from MSB to LSB, is:
 //	[2] nodeType [1] wildcard [13] number of children [16] first child.
 // If a node has no children then the low 29 bits are zero.
 //
 // The second uint32 encodes the node's text. The layout is:
 //	[24] text offset [8] text length.
 //
 // TODO(nigeltao): this table has a lot of zeroes, for childless nodes. It
 // would be tight, but it should be possible to use only 32 bits per node
 // instead of 64, with an offset into a parent-child table. A back-of-the-
 // envelope calculation suggests that at 6000 rows (of which 90%% are leaves),
 // this could save an extra 20KiB of data.
 var nodes = [...][2]uint32{
 `)
 	if err := n.walk(w, printNode); err != nil {
 		return err
 	}
 	fmt.Fprintf(w, "}\n")
 	return nil
 }

 type node struct {
 	label    string
 	nodeType int
 	wildcard bool
 	// index is the index of this node in the nodes array.
 	index int
 	// firstChild is the index of this node's first child, or zero if this
 	// node has no children.
 	firstChild int
 	// children are the node's children, in strictly increasing node label order.
 	children []*node
 }

 func (n *node) walk(w io.Writer, f func(w1 io.Writer, n1 *node) error) error {
 	if err := f(w, n); err != nil {
 		return err
 	}
 	for _, c := range n.children {
 		if err := c.walk(w, f); err != nil {
 			return err
 		}
 	}
 	return nil
 }

 // child returns the child of n with the given label. The child is created if
 // it did not exist beforehand.
 func (n *node) child(label string) *node {
 	for _, c := range n.children {
 		if c.label == label {
 			return c
 		}
 	}
 	c := &node{
 		label:    label,
 		nodeType: nodeTypeParentOnly,
 	}
 	n.children = append(n.children, c)
 	sort.Sort(byLabel(n.children))
 	return c
 }

 type byLabel []*node

 func (b byLabel) Len() int           { return len(b) }
 func (b byLabel) Swap(i, j int)      { b[i], b[j] = b[j], b[i] }
 func (b byLabel) Less(i, j int) bool { return b[i].label < b[j].label }

 var nextNodeIndex int

 func assignNodeIndexes(w io.Writer, n *node) error {
 	if len(n.children) != 0 {
 		n.firstChild = nextNodeIndex
 		for _, c := range n.children {
 			c.index = nextNodeIndex
 			nextNodeIndex++
 		}
 	}
 	return nil
 }

 func printNode(w io.Writer, n *node) error {
 	for _, c := range n.children {
 		s := "-------------"
 		if len(c.children) != 0 {
 			s = fmt.Sprintf("0x%04x-0x%04x", c.firstChild, c.firstChild+len(c.children))
 		}
 		wildcardBit, wildcardStr := uint32(0), ' '
 		if c.wildcard {
 			wildcardBit, wildcardStr = 1<<29, '*'
 		}
 		if c.firstChild >= 1<<16 || len(c.children) >= 1<<13 {
 			return fmt.Errorf("nodes offset/length is too large: %d/%d", c.firstChild, len(c.children))
 		}
 		encoding := uint32(c.nodeType<<30) | wildcardBit | uint32(len(c.children)<<16) | uint32(c.firstChild)
 		fmt.Fprintf(w, "{0x%08x, 0x%08x}, // 0x%04x (%s) %s%c %s\n",
 			encoding, labelEncoding[c.label], c.index, s,
 			nodeTypeString(c.nodeType), wildcardStr, c.label,
 		)
 	}
 	return nil
 }

 func printNodeLabel(w io.Writer, n *node) error {
 	for _, c := range n.children {
 		fmt.Fprintf(w, "%q,\n", c.label)
 	}
 	return nil
 }

 // makeText combines all the strings in labelsList to form one giant string.
 // If the crush flag is true, then overlapping strings will be merged: "arpa"
 // and "parliament" could yield "arparliament".
 func makeText() string {
 	if !*crush {
 		return strings.Join(labelsList, "")
 	}

 	beforeLength := 0
 	for _, s := range labelsList {
 		beforeLength += len(s)
 	}

 	// Make a copy of labelsList.
 	ss := append(make([]string, 0, len(labelsList)), labelsList...)

 	// Remove strings that are substrings of other strings.
 	for changed := true; changed; {
 		changed = false
 		for i, s := range ss {
 			if s == "" {
 				continue
 			}
 			for j, t := range ss {
 				if i != j && t != "" && strings.Contains(s, t) {
 					changed = true
 					ss[j] = ""
 				}
 			}
 		}
 	}

 	// Remove the empty strings.
 	sort.Strings(ss)
 	for len(ss) > 0 && ss[0] == "" {
 		ss = ss[1:]
 	}

 	// Join strings where one suffix matches another prefix.
 	for {
 		// Find best i, j, k such that ss[i][len-k:] == ss[j][:k],
 		// maximizing overlap length k.
 		besti := -1
 		bestj := -1
 		bestk := 0
 		for i, s := range ss {
 			if s == "" {
 				continue
 			}
 			for j, t := range ss {
 				if i == j {
 					continue
 				}
 				for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
 					if s[len(s)-k:] == t[:k] {
 						besti = i
 						bestj = j
 						bestk = k
 					}
 				}
 			}
 		}
 		if bestk > 0 {
 			if *v {
 				fmt.Fprintf(os.Stderr, "%d-length overlap at (%4d,%4d) out of (%4d,%4d): %q and %q\n",
 					bestk, besti, bestj, len(ss), len(ss), ss[besti], ss[bestj])
 			}
 			ss[besti] += ss[bestj][bestk:]
 			ss[bestj] = ""
 			continue
 		}
 		break
 	}

 	text := strings.Join(ss, "")
 	if *v {
 		fmt.Fprintf(os.Stderr, "crushed %d bytes to become %d bytes\n", beforeLength, len(text))
 	}
 	return text
 }
	// Copyright 2012 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// +build ignore

	package main

	// This program generates table.go and table_test.go.
	// Invoke as:
	//
	// go run gen.go -version "xxx" >table.go
	// go run gen.go -version "xxx" -test >table_test.go
	//
	// The version is derived from information found at
	// http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat
	// which is linked from http://publicsuffix.org/list/.
	//
	// To fetch a particular hg revision, such as 05b11a8d1ace, pass
	// -url "http://hg.mozilla.org/mozilla-central/raw-file/05b11a8d1ace/netwerk/dns/effective_tld_names.dat"

	import (
	"bufio"
	"bytes"
	"flag"
	"fmt"
	"go/format"
	"io"
	"net/http"
	"os"
	"sort"
	"strings"

	"code.google.com/p/go.net/idna"
	)

	const (
	nodeTypeNormal = 0
	nodeTypeException = 1
	nodeTypeParentOnly = 2
	)

	func nodeTypeString(n int) string {
	switch n {
	case nodeTypeNormal:
	return "+"
	case nodeTypeException:
	return "!"
	case nodeTypeParentOnly:
	return "o"
	}
	panic("unreachable")
	}

	var (
	labelEncoding = map[string]uint32{}
	labelsList = []string{}
	labelsMap = map[string]bool{}
	rules = []string{}

	crush = flag.Bool("crush", true, "make the generated node text as small as possible")
	subset = flag.Bool("subset", false, "generate only a subset of the full table, for debugging")
	url = flag.String("url",
	"http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1",
	"URL of the publicsuffix.org list. If empty, stdin is read instead")
	v = flag.Bool("v", false, "verbose output (to stderr)")
	version = flag.String("version", "", "the effective_tld_names.dat version")
	test = flag.Bool("test", false, "generate table_test.go")
	)

	func main() {
	if err := main1(); err != nil {
	fmt.Fprintln(os.Stderr, err)
	os.Exit(1)
	}
	}

	func main1() error {
	flag.Parse()
	if *version == "" {
	return fmt.Errorf("-version was not specified")
	}
	var r io.Reader = os.Stdin
	if *url != "" {
	res, err := http.Get(*url)
	if err != nil {
	return err
	}
	if res.StatusCode != http.StatusOK {
	return fmt.Errorf("bad GET status for %s: %d", *url, res.Status)
	}
	r = res.Body
	defer res.Body.Close()
	}

	var root node
	buf := new(bytes.Buffer)
	br := bufio.NewReader(r)
	for {
	s, err := br.ReadString('\n')
	if err != nil {
	if err == io.EOF {
	break
	}
	return err
	}
	s = strings.TrimSpace(s)
	if s == "" \|\| strings.HasPrefix(s, "//") {
	continue
	}
	s, err = idna.ToASCII(s)
	if err != nil {
	return err
	}

	if *subset {
	switch {
	case s == "ao" \|\| strings.HasSuffix(s, ".ao"):
	case s == "ar" \|\| strings.HasSuffix(s, ".ar"):
	case s == "arpa" \|\| strings.HasSuffix(s, ".arpa"):
	case s == "jp":
	case s == "kobe.jp" \|\| strings.HasSuffix(s, ".kobe.jp"):
	case s == "kyoto.jp" \|\| strings.HasSuffix(s, ".kyoto.jp"):
	case s == "uk" \|\| strings.HasSuffix(s, ".uk"):
	case s == "tw" \|\| strings.HasSuffix(s, ".tw"):
	case s == "zw" \|\| strings.HasSuffix(s, ".zw"):
	case s == "xn--p1ai" \|\| strings.HasSuffix(s, ".xn--p1ai"):
	// xn--p1ai is Russian-Cyrillic "рф".
	default:
	continue
	}
	}

	rules = append(rules, s)

	nt, wildcard := nodeTypeNormal, false
	switch {
	case strings.HasPrefix(s, "*."):
	s, nt = s[2:], nodeTypeParentOnly
	wildcard = true
	case strings.HasPrefix(s, "!"):
	s, nt = s[1:], nodeTypeException
	}
	labels := strings.Split(s, ".")
	for n, i := &root, len(labels)-1; i >= 0; i-- {
	label := labels[i]
	n = n.child(label)
	if i == 0 {
	if nt != nodeTypeParentOnly && n.nodeType == nodeTypeParentOnly {
	n.nodeType = nt
	}
	n.wildcard = n.wildcard \|\| wildcard
	}
	labelsMap[label] = true
	}
	}
	labelsList = make([]string, 0, len(labelsMap))
	for label := range labelsMap {
	labelsList = append(labelsList, label)
	}
	sort.Strings(labelsList)

	p := printReal
	if *test {
	p = printTest
	}
	if err := p(buf, &root); err != nil {
	return err
	}

	b, err := format.Source(buf.Bytes())
	if err != nil {
	return err
	}
	_, err = os.Stdout.Write(b)
	return err
	}

	func printTest(w io.Writer, n *node) error {
	fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n")
	fmt.Fprintf(w, "package publicsuffix\n\nvar rules = [...]string{\n")
	for _, rule := range rules {
	fmt.Fprintf(w, "%q,\n", rule)
	}
	fmt.Fprintf(w, "}\n\nvar nodeLabels = [...]string{\n")
	if err := n.walk(w, printNodeLabel); err != nil {
	return err
	}
	fmt.Fprintf(w, "}\n")
	return nil
	}

	func printReal(w io.Writer, n *node) error {
	const header = `// generated by go run gen.go; DO NOT EDIT

	package publicsuffix

	const version = %q

	const (
	nodeTypeNormal = %d
	nodeTypeException = %d
	nodeTypeParentOnly = %d
	)

	// numTLD is the number of top level domains.
	const numTLD = %d

	`
	fmt.Fprintf(w, header, *version, nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children))

	text := makeText()
	if text == "" {
	return fmt.Errorf("internal error: makeText returned no text")
	}
	for _, label := range labelsList {
	offset, length := strings.Index(text, label), len(label)
	if offset < 0 {
	return fmt.Errorf("internal error: could not find %q in text %q", label, text)
	}
	if offset >= 1<<24 \|\| length >= 1<<8 {
	return fmt.Errorf("text offset/length is too large: %d/%d", offset, length)
	}
	labelEncoding[label] = uint32(offset)<<8 \| uint32(length)
	}
	fmt.Fprintf(w, "// Text is the combined text of all labels.\nconst text = ")
	for len(text) > 0 {
	n, plus := len(text), ""
	if n > 64 {
	n, plus = 64, " +"
	}
	fmt.Fprintf(w, "%q%s\n", text[:n], plus)
	text = text[n:]
	}

	n.walk(w, assignNodeIndexes)

	fmt.Fprintf(w, `

	// Nodes is the list of nodes. Each node is encoded as two uint32 values.
	//
	// The first uint32 encodes the node's children, nodeType, and a wildcard bit.
	// In the //-comment after each node's data, the indexes of the children are
	// formatted as (0x1234-0x1256). The nodeType is printed as + for normal, ! for
	// exception, and o for parent-only nodes that have children but don't match a
	// domain in their own right. The * denotes the wildcard bit. The layout within
	// the uint32, from MSB to LSB, is:
	// [2] nodeType [1] wildcard [13] number of children [16] first child.
	// If a node has no children then the low 29 bits are zero.
	//
	// The second uint32 encodes the node's text. The layout is:
	// [24] text offset [8] text length.
	//
	// TODO(nigeltao): this table has a lot of zeroes, for childless nodes. It
	// would be tight, but it should be possible to use only 32 bits per node
	// instead of 64, with an offset into a parent-child table. A back-of-the-
	// envelope calculation suggests that at 6000 rows (of which 90%% are leaves),
	// this could save an extra 20KiB of data.
	var nodes = [...][2]uint32{
	`)
	if err := n.walk(w, printNode); err != nil {
	return err
	}
	fmt.Fprintf(w, "}\n")
	return nil
	}

	type node struct {
	label string
	nodeType int
	wildcard bool
	// index is the index of this node in the nodes array.
	index int
	// firstChild is the index of this node's first child, or zero if this
	// node has no children.
	firstChild int
	// children are the node's children, in strictly increasing node label order.
	children []*node
	}

	func (n node) walk(w io.Writer, f func(w1 io.Writer, n1 node) error) error {
	if err := f(w, n); err != nil {
	return err
	}
	for _, c := range n.children {
	if err := c.walk(w, f); err != nil {
	return err
	}
	}
	return nil
	}

	// child returns the child of n with the given label. The child is created if
	// it did not exist beforehand.
	func (n node) child(label string) node {
	for _, c := range n.children {
	if c.label == label {
	return c
	}
	}
	c := &node{
	label: label,
	nodeType: nodeTypeParentOnly,
	}
	n.children = append(n.children, c)
	sort.Sort(byLabel(n.children))
	return c
	}

	type byLabel []*node

	func (b byLabel) Len() int { return len(b) }
	func (b byLabel) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
	func (b byLabel) Less(i, j int) bool { return b[i].label < b[j].label }

	var nextNodeIndex int

	func assignNodeIndexes(w io.Writer, n *node) error {
	if len(n.children) != 0 {
	n.firstChild = nextNodeIndex
	for _, c := range n.children {
	c.index = nextNodeIndex
	nextNodeIndex++
	}
	}
	return nil
	}

	func printNode(w io.Writer, n *node) error {
	for _, c := range n.children {
	s := "-------------"
	if len(c.children) != 0 {
	s = fmt.Sprintf("0x%04x-0x%04x", c.firstChild, c.firstChild+len(c.children))
	}
	wildcardBit, wildcardStr := uint32(0), ' '
	if c.wildcard {
	wildcardBit, wildcardStr = 1<<29, '*'
	}
	if c.firstChild >= 1<<16 \|\| len(c.children) >= 1<<13 {
	return fmt.Errorf("nodes offset/length is too large: %d/%d", c.firstChild, len(c.children))
	}
	encoding := uint32(c.nodeType<<30) \| wildcardBit \| uint32(len(c.children)<<16) \| uint32(c.firstChild)
	fmt.Fprintf(w, "{0x%08x, 0x%08x}, // 0x%04x (%s) %s%c %s\n",
	encoding, labelEncoding[c.label], c.index, s,
	nodeTypeString(c.nodeType), wildcardStr, c.label,
	)
	}
	return nil
	}

	func printNodeLabel(w io.Writer, n *node) error {
	for _, c := range n.children {
	fmt.Fprintf(w, "%q,\n", c.label)
	}
	return nil
	}

	// makeText combines all the strings in labelsList to form one giant string.
	// If the crush flag is true, then overlapping strings will be merged: "arpa"
	// and "parliament" could yield "arparliament".
	func makeText() string {
	if !*crush {
	return strings.Join(labelsList, "")
	}

	beforeLength := 0
	for _, s := range labelsList {
	beforeLength += len(s)
	}

	// Make a copy of labelsList.
	ss := append(make([]string, 0, len(labelsList)), labelsList...)

	// Remove strings that are substrings of other strings.
	for changed := true; changed; {
	changed = false
	for i, s := range ss {
	if s == "" {
	continue
	}
	for j, t := range ss {
	if i != j && t != "" && strings.Contains(s, t) {
	changed = true
	ss[j] = ""
	}
	}
	}
	}

	// Remove the empty strings.
	sort.Strings(ss)
	for len(ss) > 0 && ss[0] == "" {
	ss = ss[1:]
	}

	// Join strings where one suffix matches another prefix.
	for {
	// Find best i, j, k such that ss[i][len-k:] == ss[j][:k],
	// maximizing overlap length k.
	besti := -1
	bestj := -1
	bestk := 0
	for i, s := range ss {
	if s == "" {
	continue
	}
	for j, t := range ss {
	if i == j {
	continue
	}
	for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
	if s[len(s)-k:] == t[:k] {
	besti = i
	bestj = j
	bestk = k
	}
	}
	}
	}
	if bestk > 0 {
	if *v {
	fmt.Fprintf(os.Stderr, "%d-length overlap at (%4d,%4d) out of (%4d,%4d): %q and %q\n",
	bestk, besti, bestj, len(ss), len(ss), ss[besti], ss[bestj])
	}
	ss[besti] += ss[bestj][bestk:]
	ss[bestj] = ""
	continue
	}
	break
	}

	text := strings.Join(ss, "")
	if *v {
	fmt.Fprintf(os.Stderr, "crushed %d bytes to become %d bytes\n", beforeLength, len(text))
	}
	return text
	}