blob: 71e458d8cb21efd7d568f1ed9141bc6858321eae [file] [log] [blame]
Nigel Tao67a30482012-12-12 15:58:52 +11001// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// +build ignore
6
7package main
8
9// This program generates table.go and table_test.go.
10// Invoke as:
11//
12// go run gen.go -version "xxx" >table.go
13// go run gen.go -version "xxx" -test >table_test.go
14//
15// The version is derived from information found at
16// http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat
17// which is linked from http://publicsuffix.org/list/.
18//
19// To fetch a particular hg revision, such as 05b11a8d1ace, pass
20// -url "http://hg.mozilla.org/mozilla-central/raw-file/05b11a8d1ace/netwerk/dns/effective_tld_names.dat"
21
Nigel Tao67a30482012-12-12 15:58:52 +110022import (
23 "bufio"
24 "bytes"
25 "flag"
26 "fmt"
27 "go/format"
28 "io"
29 "net/http"
30 "os"
31 "sort"
32 "strings"
Nigel Taocbecf2f2012-12-20 19:36:00 +110033
34 "code.google.com/p/go.net/idna"
Nigel Tao67a30482012-12-12 15:58:52 +110035)
36
37const (
Nigel Tao0f34b772012-12-22 12:09:13 +110038 nodesBitsChildren = 9
Nigel Taob8ab5102013-01-09 22:10:50 +110039 nodesBitsICANN = 1
Nigel Tao0f34b772012-12-22 12:09:13 +110040 nodesBitsTextOffset = 15
41 nodesBitsTextLength = 6
42
43 childrenBitsWildcard = 1
Nigel Taob8ab5102013-01-09 22:10:50 +110044 childrenBitsNodeType = 2
Nigel Tao0f34b772012-12-22 12:09:13 +110045 childrenBitsHi = 14
46 childrenBitsLo = 14
47)
48
49const (
Nigel Tao67a30482012-12-12 15:58:52 +110050 nodeTypeNormal = 0
51 nodeTypeException = 1
52 nodeTypeParentOnly = 2
Nigel Taob8ab5102013-01-09 22:10:50 +110053 numNodeType = 3
Nigel Tao67a30482012-12-12 15:58:52 +110054)
55
Nigel Taob8ab5102013-01-09 22:10:50 +110056func nodeTypeStr(n int) string {
Nigel Tao67a30482012-12-12 15:58:52 +110057 switch n {
58 case nodeTypeNormal:
59 return "+"
60 case nodeTypeException:
61 return "!"
62 case nodeTypeParentOnly:
63 return "o"
64 }
65 panic("unreachable")
66}
67
68var (
69 labelEncoding = map[string]uint32{}
70 labelsList = []string{}
71 labelsMap = map[string]bool{}
72 rules = []string{}
73
74 crush = flag.Bool("crush", true, "make the generated node text as small as possible")
75 subset = flag.Bool("subset", false, "generate only a subset of the full table, for debugging")
76 url = flag.String("url",
77 "http://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1",
78 "URL of the publicsuffix.org list. If empty, stdin is read instead")
79 v = flag.Bool("v", false, "verbose output (to stderr)")
80 version = flag.String("version", "", "the effective_tld_names.dat version")
81 test = flag.Bool("test", false, "generate table_test.go")
82)
83
84func main() {
85 if err := main1(); err != nil {
86 fmt.Fprintln(os.Stderr, err)
87 os.Exit(1)
88 }
89}
90
91func main1() error {
92 flag.Parse()
Nigel Taob8ab5102013-01-09 22:10:50 +110093 if nodesBitsTextLength+nodesBitsTextOffset+nodesBitsICANN+nodesBitsChildren > 32 {
Nigel Tao0f34b772012-12-22 12:09:13 +110094 return fmt.Errorf("not enough bits to encode the nodes table")
95 }
Nigel Taob8ab5102013-01-09 22:10:50 +110096 if childrenBitsLo+childrenBitsHi+childrenBitsNodeType+childrenBitsWildcard > 32 {
Nigel Tao0f34b772012-12-22 12:09:13 +110097 return fmt.Errorf("not enough bits to encode the children table")
98 }
Nigel Tao67a30482012-12-12 15:58:52 +110099 if *version == "" {
100 return fmt.Errorf("-version was not specified")
101 }
102 var r io.Reader = os.Stdin
103 if *url != "" {
104 res, err := http.Get(*url)
105 if err != nil {
106 return err
107 }
108 if res.StatusCode != http.StatusOK {
109 return fmt.Errorf("bad GET status for %s: %d", *url, res.Status)
110 }
111 r = res.Body
112 defer res.Body.Close()
113 }
114
115 var root node
Nigel Taob8ab5102013-01-09 22:10:50 +1100116 icann := false
Nigel Tao67a30482012-12-12 15:58:52 +1100117 buf := new(bytes.Buffer)
118 br := bufio.NewReader(r)
119 for {
120 s, err := br.ReadString('\n')
121 if err != nil {
122 if err == io.EOF {
123 break
124 }
125 return err
126 }
127 s = strings.TrimSpace(s)
Nigel Taob8ab5102013-01-09 22:10:50 +1100128 if strings.Contains(s, "BEGIN ICANN DOMAINS") {
129 icann = true
130 continue
131 }
132 if strings.Contains(s, "END ICANN DOMAINS") {
133 icann = false
134 continue
135 }
Nigel Taocbecf2f2012-12-20 19:36:00 +1100136 if s == "" || strings.HasPrefix(s, "//") {
Nigel Tao67a30482012-12-12 15:58:52 +1100137 continue
138 }
Nigel Taocbecf2f2012-12-20 19:36:00 +1100139 s, err = idna.ToASCII(s)
140 if err != nil {
141 return err
142 }
Nigel Tao67a30482012-12-12 15:58:52 +1100143
144 if *subset {
145 switch {
Nigel Tao61791142013-01-22 21:23:30 +1100146 case s == "ac.jp" || strings.HasSuffix(s, ".ac.jp"):
147 case s == "ak.us" || strings.HasSuffix(s, ".ak.us"):
Nigel Tao67a30482012-12-12 15:58:52 +1100148 case s == "ao" || strings.HasSuffix(s, ".ao"):
149 case s == "ar" || strings.HasSuffix(s, ".ar"):
150 case s == "arpa" || strings.HasSuffix(s, ".arpa"):
Nigel Tao61791142013-01-22 21:23:30 +1100151 case s == "cy" || strings.HasSuffix(s, ".cy"):
Nigel Taob8ab5102013-01-09 22:10:50 +1100152 case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"):
Nigel Tao67a30482012-12-12 15:58:52 +1100153 case s == "jp":
154 case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
155 case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
Nigel Tao61791142013-01-22 21:23:30 +1100156 case s == "om" || strings.HasSuffix(s, ".om"):
Nigel Tao67a30482012-12-12 15:58:52 +1100157 case s == "uk" || strings.HasSuffix(s, ".uk"):
Nigel Tao61791142013-01-22 21:23:30 +1100158 case s == "uk.com" || strings.HasSuffix(s, ".uk.com"):
Nigel Taocbecf2f2012-12-20 19:36:00 +1100159 case s == "tw" || strings.HasSuffix(s, ".tw"):
Nigel Tao67a30482012-12-12 15:58:52 +1100160 case s == "zw" || strings.HasSuffix(s, ".zw"):
Nigel Taocbecf2f2012-12-20 19:36:00 +1100161 case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"):
162 // xn--p1ai is Russian-Cyrillic "рф".
Nigel Tao67a30482012-12-12 15:58:52 +1100163 default:
164 continue
165 }
166 }
167
168 rules = append(rules, s)
169
170 nt, wildcard := nodeTypeNormal, false
171 switch {
172 case strings.HasPrefix(s, "*."):
173 s, nt = s[2:], nodeTypeParentOnly
174 wildcard = true
175 case strings.HasPrefix(s, "!"):
176 s, nt = s[1:], nodeTypeException
177 }
178 labels := strings.Split(s, ".")
179 for n, i := &root, len(labels)-1; i >= 0; i-- {
180 label := labels[i]
181 n = n.child(label)
182 if i == 0 {
183 if nt != nodeTypeParentOnly && n.nodeType == nodeTypeParentOnly {
184 n.nodeType = nt
185 }
Nigel Taob8ab5102013-01-09 22:10:50 +1100186 n.icann = n.icann && icann
Nigel Tao67a30482012-12-12 15:58:52 +1100187 n.wildcard = n.wildcard || wildcard
188 }
189 labelsMap[label] = true
190 }
191 }
192 labelsList = make([]string, 0, len(labelsMap))
193 for label := range labelsMap {
194 labelsList = append(labelsList, label)
195 }
196 sort.Strings(labelsList)
197
198 p := printReal
199 if *test {
200 p = printTest
201 }
202 if err := p(buf, &root); err != nil {
203 return err
204 }
205
206 b, err := format.Source(buf.Bytes())
207 if err != nil {
208 return err
209 }
210 _, err = os.Stdout.Write(b)
211 return err
212}
213
Nigel Tao67a30482012-12-12 15:58:52 +1100214func printTest(w io.Writer, n *node) error {
215 fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n")
216 fmt.Fprintf(w, "package publicsuffix\n\nvar rules = [...]string{\n")
217 for _, rule := range rules {
218 fmt.Fprintf(w, "%q,\n", rule)
219 }
220 fmt.Fprintf(w, "}\n\nvar nodeLabels = [...]string{\n")
221 if err := n.walk(w, printNodeLabel); err != nil {
222 return err
223 }
224 fmt.Fprintf(w, "}\n")
225 return nil
226}
227
228func printReal(w io.Writer, n *node) error {
229 const header = `// generated by go run gen.go; DO NOT EDIT
230
231package publicsuffix
232
233const version = %q
234
235const (
Nigel Tao0f34b772012-12-22 12:09:13 +1100236 nodesBitsChildren = %d
Nigel Taob8ab5102013-01-09 22:10:50 +1100237 nodesBitsICANN = %d
Nigel Tao0f34b772012-12-22 12:09:13 +1100238 nodesBitsTextOffset = %d
239 nodesBitsTextLength = %d
240
241 childrenBitsWildcard = %d
Nigel Taob8ab5102013-01-09 22:10:50 +1100242 childrenBitsNodeType = %d
Nigel Tao0f34b772012-12-22 12:09:13 +1100243 childrenBitsHi = %d
244 childrenBitsLo = %d
245)
246
247const (
Nigel Tao67a30482012-12-12 15:58:52 +1100248 nodeTypeNormal = %d
249 nodeTypeException = %d
250 nodeTypeParentOnly = %d
251)
252
253// numTLD is the number of top level domains.
254const numTLD = %d
255
256`
Nigel Tao0f34b772012-12-22 12:09:13 +1100257 fmt.Fprintf(w, header, *version,
Nigel Taob8ab5102013-01-09 22:10:50 +1100258 nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength,
259 childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo,
Nigel Tao0f34b772012-12-22 12:09:13 +1100260 nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children))
Nigel Tao67a30482012-12-12 15:58:52 +1100261
262 text := makeText()
263 if text == "" {
264 return fmt.Errorf("internal error: makeText returned no text")
265 }
266 for _, label := range labelsList {
267 offset, length := strings.Index(text, label), len(label)
268 if offset < 0 {
269 return fmt.Errorf("internal error: could not find %q in text %q", label, text)
270 }
Nigel Tao0f34b772012-12-22 12:09:13 +1100271 if offset >= 1<<nodesBitsTextOffset || length >= 1<<nodesBitsTextLength {
Nigel Tao67a30482012-12-12 15:58:52 +1100272 return fmt.Errorf("text offset/length is too large: %d/%d", offset, length)
273 }
Nigel Tao0f34b772012-12-22 12:09:13 +1100274 labelEncoding[label] = uint32(offset)<<nodesBitsTextLength | uint32(length)
Nigel Tao67a30482012-12-12 15:58:52 +1100275 }
276 fmt.Fprintf(w, "// Text is the combined text of all labels.\nconst text = ")
277 for len(text) > 0 {
278 n, plus := len(text), ""
279 if n > 64 {
280 n, plus = 64, " +"
281 }
282 fmt.Fprintf(w, "%q%s\n", text[:n], plus)
283 text = text[n:]
284 }
285
Nigel Tao0f34b772012-12-22 12:09:13 +1100286 n.walk(w, assignIndexes)
Nigel Tao67a30482012-12-12 15:58:52 +1100287
288 fmt.Fprintf(w, `
289
Nigel Tao0f34b772012-12-22 12:09:13 +1100290// nodes is the list of nodes. Each node is represented as a uint32, which
Nigel Taob8ab5102013-01-09 22:10:50 +1100291// encodes the node's children, wildcard bit and node type (as an index into
292// the children array), ICANN bit and text.
Nigel Tao67a30482012-12-12 15:58:52 +1100293//
Nigel Tao0f34b772012-12-22 12:09:13 +1100294// In the //-comment after each node's data, the nodes indexes of the children
295// are formatted as (n0x1234-n0x1256), with * denoting the wildcard bit. The
296// nodeType is printed as + for normal, ! for exception, and o for parent-only
297// nodes that have children but don't match a domain label in their own right.
Nigel Taob8ab5102013-01-09 22:10:50 +1100298// An I denotes an ICANN domain.
Nigel Tao67a30482012-12-12 15:58:52 +1100299//
Nigel Tao0f34b772012-12-22 12:09:13 +1100300// The layout within the uint32, from MSB to LSB, is:
301// [%2d bits] unused
302// [%2d bits] children index
Nigel Taob8ab5102013-01-09 22:10:50 +1100303// [%2d bits] ICANN bit
Nigel Tao0f34b772012-12-22 12:09:13 +1100304// [%2d bits] text index
305// [%2d bits] text length
306var nodes = [...]uint32{
307`,
Nigel Taob8ab5102013-01-09 22:10:50 +1100308 32-nodesBitsChildren-nodesBitsICANN-nodesBitsTextOffset-nodesBitsTextLength,
309 nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength)
Nigel Tao67a30482012-12-12 15:58:52 +1100310 if err := n.walk(w, printNode); err != nil {
311 return err
312 }
Nigel Tao0f34b772012-12-22 12:09:13 +1100313 fmt.Fprintf(w, `}
314
Nigel Taob8ab5102013-01-09 22:10:50 +1100315// children is the list of nodes' children, the parent's wildcard bit and the
316// parent's node type. If a node has no children then their children index
317// will be in the range [0, 6), depending on the wildcard bit and node type.
Nigel Tao0f34b772012-12-22 12:09:13 +1100318//
319// The layout within the uint32, from MSB to LSB, is:
320// [%2d bits] unused
321// [%2d bits] wildcard bit
Nigel Taob8ab5102013-01-09 22:10:50 +1100322// [%2d bits] node type
Nigel Tao0f34b772012-12-22 12:09:13 +1100323// [%2d bits] high nodes index (exclusive) of children
324// [%2d bits] low nodes index (inclusive) of children
325var children=[...]uint32{
326`,
Nigel Taob8ab5102013-01-09 22:10:50 +1100327 32-childrenBitsWildcard-childrenBitsNodeType-childrenBitsHi-childrenBitsLo,
328 childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo)
Nigel Tao0f34b772012-12-22 12:09:13 +1100329 for i, c := range childrenEncoding {
330 s := "---------------"
331 lo := c & (1<<childrenBitsLo - 1)
332 hi := (c >> childrenBitsLo) & (1<<childrenBitsHi - 1)
333 if lo != hi {
334 s = fmt.Sprintf("n0x%04x-n0x%04x", lo, hi)
335 }
Nigel Taob8ab5102013-01-09 22:10:50 +1100336 nodeType := int(c>>(childrenBitsLo+childrenBitsHi)) & (1<<childrenBitsNodeType - 1)
337 wildcard := c>>(childrenBitsLo+childrenBitsHi+childrenBitsNodeType) != 0
338 fmt.Fprintf(w, "0x%08x, // c0x%04x (%s)%s %s\n",
339 c, i, s, wildcardStr(wildcard), nodeTypeStr(nodeType))
Nigel Tao0f34b772012-12-22 12:09:13 +1100340 }
Nigel Tao67a30482012-12-12 15:58:52 +1100341 fmt.Fprintf(w, "}\n")
342 return nil
343}
344
345type node struct {
346 label string
347 nodeType int
Nigel Taob8ab5102013-01-09 22:10:50 +1100348 icann bool
Nigel Tao67a30482012-12-12 15:58:52 +1100349 wildcard bool
Nigel Tao0f34b772012-12-22 12:09:13 +1100350 // nodesIndex and childrenIndex are the index of this node in the nodes
351 // and the index of its children offset/length in the children arrays.
352 nodesIndex, childrenIndex int
Nigel Tao67a30482012-12-12 15:58:52 +1100353 // firstChild is the index of this node's first child, or zero if this
354 // node has no children.
355 firstChild int
356 // children are the node's children, in strictly increasing node label order.
357 children []*node
358}
359
360func (n *node) walk(w io.Writer, f func(w1 io.Writer, n1 *node) error) error {
361 if err := f(w, n); err != nil {
362 return err
363 }
364 for _, c := range n.children {
365 if err := c.walk(w, f); err != nil {
366 return err
367 }
368 }
369 return nil
370}
371
372// child returns the child of n with the given label. The child is created if
373// it did not exist beforehand.
374func (n *node) child(label string) *node {
375 for _, c := range n.children {
376 if c.label == label {
377 return c
378 }
379 }
380 c := &node{
381 label: label,
382 nodeType: nodeTypeParentOnly,
Nigel Taob8ab5102013-01-09 22:10:50 +1100383 icann: true,
Nigel Tao67a30482012-12-12 15:58:52 +1100384 }
385 n.children = append(n.children, c)
386 sort.Sort(byLabel(n.children))
387 return c
388}
389
390type byLabel []*node
391
392func (b byLabel) Len() int { return len(b) }
393func (b byLabel) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
394func (b byLabel) Less(i, j int) bool { return b[i].label < b[j].label }
395
Nigel Tao0f34b772012-12-22 12:09:13 +1100396var nextNodesIndex int
Nigel Tao67a30482012-12-12 15:58:52 +1100397
Nigel Taob8ab5102013-01-09 22:10:50 +1100398// childrenEncoding are the encoded entries in the generated children array.
399// All these pre-defined entries have no children.
Nigel Tao0f34b772012-12-22 12:09:13 +1100400var childrenEncoding = []uint32{
Nigel Taob8ab5102013-01-09 22:10:50 +1100401 0 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeNormal.
402 1 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeException.
403 2 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeParentOnly.
404 4 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeNormal.
405 5 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeException.
406 6 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeParentOnly.
Nigel Tao0f34b772012-12-22 12:09:13 +1100407}
408
409var firstCallToAssignIndexes = true
410
411func assignIndexes(w io.Writer, n *node) error {
Nigel Tao67a30482012-12-12 15:58:52 +1100412 if len(n.children) != 0 {
Nigel Tao0f34b772012-12-22 12:09:13 +1100413 // Assign nodesIndex.
414 n.firstChild = nextNodesIndex
Nigel Tao67a30482012-12-12 15:58:52 +1100415 for _, c := range n.children {
Nigel Tao0f34b772012-12-22 12:09:13 +1100416 c.nodesIndex = nextNodesIndex
417 nextNodesIndex++
Nigel Tao67a30482012-12-12 15:58:52 +1100418 }
Nigel Tao0f34b772012-12-22 12:09:13 +1100419
420 // The root node's children is implicit.
421 if firstCallToAssignIndexes {
422 firstCallToAssignIndexes = false
423 return nil
424 }
425
426 // Assign childrenIndex.
427 if len(childrenEncoding) >= 1<<nodesBitsChildren {
428 return fmt.Errorf("children table is too large")
429 }
430 n.childrenIndex = len(childrenEncoding)
431 lo := uint32(n.firstChild)
432 hi := lo + uint32(len(n.children))
433 if lo >= 1<<childrenBitsLo || hi >= 1<<childrenBitsHi {
434 return fmt.Errorf("children lo/hi is too large: %d/%d", lo, hi)
435 }
436 enc := hi<<childrenBitsLo | lo
Nigel Taob8ab5102013-01-09 22:10:50 +1100437 enc |= uint32(n.nodeType) << (childrenBitsLo + childrenBitsHi)
Nigel Tao0f34b772012-12-22 12:09:13 +1100438 if n.wildcard {
Nigel Taob8ab5102013-01-09 22:10:50 +1100439 enc |= 1 << (childrenBitsLo + childrenBitsHi + childrenBitsNodeType)
Nigel Tao0f34b772012-12-22 12:09:13 +1100440 }
441 childrenEncoding = append(childrenEncoding, enc)
Nigel Taob8ab5102013-01-09 22:10:50 +1100442 } else {
443 n.childrenIndex = n.nodeType
444 if n.wildcard {
445 n.childrenIndex += numNodeType
446 }
Nigel Tao67a30482012-12-12 15:58:52 +1100447 }
448 return nil
449}
450
451func printNode(w io.Writer, n *node) error {
452 for _, c := range n.children {
Nigel Tao0f34b772012-12-22 12:09:13 +1100453 s := "---------------"
Nigel Tao67a30482012-12-12 15:58:52 +1100454 if len(c.children) != 0 {
Nigel Tao0f34b772012-12-22 12:09:13 +1100455 s = fmt.Sprintf("n0x%04x-n0x%04x", c.firstChild, c.firstChild+len(c.children))
Nigel Tao67a30482012-12-12 15:58:52 +1100456 }
Nigel Taob8ab5102013-01-09 22:10:50 +1100457 encoding := labelEncoding[c.label]
458 if c.icann {
459 encoding |= 1 << (nodesBitsTextLength + nodesBitsTextOffset)
460 }
461 encoding |= uint32(c.childrenIndex) << (nodesBitsTextLength + nodesBitsTextOffset + nodesBitsICANN)
462 fmt.Fprintf(w, "0x%08x, // n0x%04x c0x%04x (%s)%s %s %s %s\n",
Nigel Tao0f34b772012-12-22 12:09:13 +1100463 encoding, c.nodesIndex, c.childrenIndex, s, wildcardStr(c.wildcard),
Nigel Taob8ab5102013-01-09 22:10:50 +1100464 nodeTypeStr(c.nodeType), icannStr(c.icann), c.label,
Nigel Tao67a30482012-12-12 15:58:52 +1100465 )
466 }
467 return nil
468}
469
470func printNodeLabel(w io.Writer, n *node) error {
471 for _, c := range n.children {
472 fmt.Fprintf(w, "%q,\n", c.label)
473 }
474 return nil
475}
476
Nigel Taob8ab5102013-01-09 22:10:50 +1100477func icannStr(icann bool) string {
478 if icann {
479 return "I"
480 }
481 return " "
482}
483
Nigel Tao0f34b772012-12-22 12:09:13 +1100484func wildcardStr(wildcard bool) string {
485 if wildcard {
486 return "*"
487 }
488 return " "
489}
490
Nigel Tao67a30482012-12-12 15:58:52 +1100491// makeText combines all the strings in labelsList to form one giant string.
492// If the crush flag is true, then overlapping strings will be merged: "arpa"
493// and "parliament" could yield "arparliament".
494func makeText() string {
495 if !*crush {
496 return strings.Join(labelsList, "")
497 }
498
499 beforeLength := 0
500 for _, s := range labelsList {
501 beforeLength += len(s)
502 }
503
504 // Make a copy of labelsList.
505 ss := append(make([]string, 0, len(labelsList)), labelsList...)
506
507 // Remove strings that are substrings of other strings.
508 for changed := true; changed; {
509 changed = false
510 for i, s := range ss {
511 if s == "" {
512 continue
513 }
514 for j, t := range ss {
515 if i != j && t != "" && strings.Contains(s, t) {
516 changed = true
517 ss[j] = ""
518 }
519 }
520 }
521 }
522
523 // Remove the empty strings.
524 sort.Strings(ss)
525 for len(ss) > 0 && ss[0] == "" {
526 ss = ss[1:]
527 }
528
529 // Join strings where one suffix matches another prefix.
530 for {
531 // Find best i, j, k such that ss[i][len-k:] == ss[j][:k],
532 // maximizing overlap length k.
533 besti := -1
534 bestj := -1
535 bestk := 0
536 for i, s := range ss {
537 if s == "" {
538 continue
539 }
540 for j, t := range ss {
541 if i == j {
542 continue
543 }
544 for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
545 if s[len(s)-k:] == t[:k] {
546 besti = i
547 bestj = j
548 bestk = k
549 }
550 }
551 }
552 }
553 if bestk > 0 {
554 if *v {
555 fmt.Fprintf(os.Stderr, "%d-length overlap at (%4d,%4d) out of (%4d,%4d): %q and %q\n",
556 bestk, besti, bestj, len(ss), len(ss), ss[besti], ss[bestj])
557 }
558 ss[besti] += ss[bestj][bestk:]
559 ss[bestj] = ""
560 continue
561 }
562 break
563 }
564
565 text := strings.Join(ss, "")
566 if *v {
567 fmt.Fprintf(os.Stderr, "crushed %d bytes to become %d bytes\n", beforeLength, len(text))
568 }
569 return text
570}