unix: add tool for merging duplicate code

mkmerge.go parses generated code (z*_GOOS_GOARCH.go) and merges
duplicate consts, funcs, and types, into one file per GOOS (z*_GOOS.go).

Updates golang/go#33059

Change-Id: I1439f260dc8c09e887e5917a3101c39b080f2882
Reviewed-on: https://go-review.googlesource.com/c/sys/+/221317
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/unix/README.md b/unix/README.md
index eb2f78a..ab433cc 100644
--- a/unix/README.md
+++ b/unix/README.md
@@ -149,6 +149,17 @@
 Then, edit the regex (if necessary) to match the desired constant. Avoid making
 the regex too broad to avoid matching unintended constants.
 
+### mkmerge.go
+
+This program is used to extract duplicate const, func, and type declarations
+from the generated architecture-specific files listed below, and merge these
+into a common file for each OS.
+
+The merge is performed in the following steps:
+1. Construct the set of common code that is idential in all architecture-specific files.
+2. Write this common code to the merged file.
+3. Remove the common code from all architecture-specific files.
+
 
 ## Generated files
 
diff --git a/unix/mkmerge.go b/unix/mkmerge.go
new file mode 100644
index 0000000..8bde450
--- /dev/null
+++ b/unix/mkmerge.go
@@ -0,0 +1,521 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// mkmerge.go parses generated source files and merges common
+// consts, funcs, and types into a common source file, per GOOS.
+//
+// Usage:
+//     $ go run mkmerge.go -out MERGED FILE [FILE ...]
+//
+// Example:
+//     # Remove all common consts, funcs, and types from zerrors_linux_*.go
+//     # and write the common code into zerrors_linux.go
+//     $ go run mkmerge.go -out zerrors_linux.go zerrors_linux_*.go
+//
+// mkmerge.go performs the merge in the following steps:
+// 1. Construct the set of common code that is idential in all
+//    architecture-specific files.
+// 2. Write this common code to the merged file.
+// 3. Remove the common code from all architecture-specific files.
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"flag"
+	"fmt"
+	"go/ast"
+	"go/format"
+	"go/parser"
+	"go/token"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"path"
+	"path/filepath"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+const validGOOS = "aix|darwin|dragonfly|freebsd|linux|netbsd|openbsd|solaris"
+
+// getValidGOOS returns GOOS, true if filename ends with a valid "_GOOS.go"
+func getValidGOOS(filename string) (string, bool) {
+	matches := regexp.MustCompile(`_(` + validGOOS + `)\.go$`).FindStringSubmatch(filename)
+	if len(matches) != 2 {
+		return "", false
+	}
+	return matches[1], true
+}
+
+// codeElem represents an ast.Decl in a comparable way.
+type codeElem struct {
+	tok token.Token // e.g. token.CONST, token.TYPE, or token.FUNC
+	src string      // the declaration formatted as source code
+}
+
+// newCodeElem returns a codeElem based on tok and node, or an error is returned.
+func newCodeElem(tok token.Token, node ast.Node) (codeElem, error) {
+	var b strings.Builder
+	err := format.Node(&b, token.NewFileSet(), node)
+	if err != nil {
+		return codeElem{}, err
+	}
+	return codeElem{tok, b.String()}, nil
+}
+
+// codeSet is a set of codeElems
+type codeSet struct {
+	set map[codeElem]bool // true for all codeElems in the set
+}
+
+// newCodeSet returns a new codeSet
+func newCodeSet() *codeSet { return &codeSet{make(map[codeElem]bool)} }
+
+// add adds elem to c
+func (c *codeSet) add(elem codeElem) { c.set[elem] = true }
+
+// has returns true if elem is in c
+func (c *codeSet) has(elem codeElem) bool { return c.set[elem] }
+
+// isEmpty returns true if the set is empty
+func (c *codeSet) isEmpty() bool { return len(c.set) == 0 }
+
+// intersection returns a new set which is the intersection of c and a
+func (c *codeSet) intersection(a *codeSet) *codeSet {
+	res := newCodeSet()
+
+	for elem := range c.set {
+		if a.has(elem) {
+			res.add(elem)
+		}
+	}
+	return res
+}
+
+// keepCommon is a filterFn for filtering the merged file with common declarations.
+func (c *codeSet) keepCommon(elem codeElem) bool {
+	switch elem.tok {
+	case token.VAR:
+		// Remove all vars from the merged file
+		return false
+	case token.CONST, token.TYPE, token.FUNC, token.COMMENT:
+		// Remove arch-specific consts, types, functions, and file-level comments from the merged file
+		return c.has(elem)
+	case token.IMPORT:
+		// Keep imports, they are handled by filterImports
+		return true
+	}
+
+	log.Fatalf("keepCommon: invalid elem %v", elem)
+	return true
+}
+
+// keepArchSpecific is a filterFn for filtering the GOARC-specific files.
+func (c *codeSet) keepArchSpecific(elem codeElem) bool {
+	switch elem.tok {
+	case token.CONST, token.TYPE, token.FUNC:
+		// Remove common consts, types, or functions from the arch-specific file
+		return !c.has(elem)
+	}
+	return true
+}
+
+// srcFile represents a source file
+type srcFile struct {
+	name string
+	src  []byte
+}
+
+// filterFn is a helper for filter
+type filterFn func(codeElem) bool
+
+// filter parses and filters Go source code from src, removing top
+// level declarations using keep as predicate.
+// For src parameter, please see docs for parser.ParseFile.
+func filter(src interface{}, keep filterFn) ([]byte, error) {
+	// Parse the src into an ast
+	fset := token.NewFileSet()
+	f, err := parser.ParseFile(fset, "", src, parser.ParseComments)
+	if err != nil {
+		return nil, err
+	}
+	cmap := ast.NewCommentMap(fset, f, f.Comments)
+
+	// Group const/type specs on adjacent lines
+	var groups specGroups = make(map[string]int)
+	var groupID int
+
+	decls := f.Decls
+	f.Decls = f.Decls[:0]
+	for _, decl := range decls {
+		switch decl := decl.(type) {
+		case *ast.GenDecl:
+			// Filter imports, consts, types, vars
+			specs := decl.Specs
+			decl.Specs = decl.Specs[:0]
+			for i, spec := range specs {
+				elem, err := newCodeElem(decl.Tok, spec)
+				if err != nil {
+					return nil, err
+				}
+
+				// Create new group if there are empty lines between this and the previous spec
+				if i > 0 && fset.Position(specs[i-1].End()).Line < fset.Position(spec.Pos()).Line-1 {
+					groupID++
+				}
+
+				// Check if we should keep this spec
+				if keep(elem) {
+					decl.Specs = append(decl.Specs, spec)
+					groups.add(elem.src, groupID)
+				}
+			}
+			// Check if we should keep this decl
+			if len(decl.Specs) > 0 {
+				f.Decls = append(f.Decls, decl)
+			}
+		case *ast.FuncDecl:
+			// Filter funcs
+			elem, err := newCodeElem(token.FUNC, decl)
+			if err != nil {
+				return nil, err
+			}
+			if keep(elem) {
+				f.Decls = append(f.Decls, decl)
+			}
+		}
+	}
+
+	// Filter file level comments
+	if cmap[f] != nil {
+		commentGroups := cmap[f]
+		cmap[f] = cmap[f][:0]
+		for _, cGrp := range commentGroups {
+			if keep(codeElem{token.COMMENT, cGrp.Text()}) {
+				cmap[f] = append(cmap[f], cGrp)
+			}
+		}
+	}
+	f.Comments = cmap.Filter(f).Comments()
+
+	// Generate code for the filtered ast
+	var buf bytes.Buffer
+	if err = format.Node(&buf, fset, f); err != nil {
+		return nil, err
+	}
+
+	groupedSrc, err := groups.filterEmptyLines(&buf)
+	if err != nil {
+		return nil, err
+	}
+
+	return filterImports(groupedSrc)
+}
+
+// getCommonSet returns the set of consts, types, and funcs that are present in every file.
+func getCommonSet(files []srcFile) (*codeSet, error) {
+	if len(files) == 0 {
+		return nil, fmt.Errorf("no files provided")
+	}
+	// Use the first architecture file as the baseline
+	baseSet, err := getCodeSet(files[0].src)
+	if err != nil {
+		return nil, err
+	}
+
+	// Compare baseline set with other architecture files: discard any element,
+	// that doesn't exist in other architecture files.
+	for _, f := range files[1:] {
+		set, err := getCodeSet(f.src)
+		if err != nil {
+			return nil, err
+		}
+
+		baseSet = baseSet.intersection(set)
+	}
+	return baseSet, nil
+}
+
+// getCodeSet returns the set of all top-level consts, types, and funcs from src.
+// src must be string, []byte, or io.Reader (see go/parser.ParseFile docs)
+func getCodeSet(src interface{}) (*codeSet, error) {
+	set := newCodeSet()
+
+	fset := token.NewFileSet()
+	f, err := parser.ParseFile(fset, "", src, parser.ParseComments)
+	if err != nil {
+		return nil, err
+	}
+
+	for _, decl := range f.Decls {
+		switch decl := decl.(type) {
+		case *ast.GenDecl:
+			// Add const, and type declarations
+			if !(decl.Tok == token.CONST || decl.Tok == token.TYPE) {
+				break
+			}
+
+			for _, spec := range decl.Specs {
+				elem, err := newCodeElem(decl.Tok, spec)
+				if err != nil {
+					return nil, err
+				}
+
+				set.add(elem)
+			}
+		case *ast.FuncDecl:
+			// Add func declarations
+			elem, err := newCodeElem(token.FUNC, decl)
+			if err != nil {
+				return nil, err
+			}
+
+			set.add(elem)
+		}
+	}
+
+	// Add file level comments
+	cmap := ast.NewCommentMap(fset, f, f.Comments)
+	for _, cGrp := range cmap[f] {
+		set.add(codeElem{token.COMMENT, cGrp.Text()})
+	}
+
+	return set, nil
+}
+
+// importName returns the identifier (PackageName) for an imported package
+func importName(iSpec *ast.ImportSpec) (string, error) {
+	if iSpec.Name == nil {
+		name, err := strconv.Unquote(iSpec.Path.Value)
+		if err != nil {
+			return "", err
+		}
+		return path.Base(name), nil
+	}
+	return iSpec.Name.Name, nil
+}
+
+// specGroups tracks grouped const/type specs with a map of line: groupID pairs
+type specGroups map[string]int
+
+// add spec source to group
+func (s specGroups) add(src string, groupID int) error {
+	srcBytes, err := format.Source(bytes.TrimSpace([]byte(src)))
+	if err != nil {
+		return err
+	}
+	s[string(srcBytes)] = groupID
+	return nil
+}
+
+// filterEmptyLines removes empty lines within groups of const/type specs.
+// Returns the filtered source.
+func (s specGroups) filterEmptyLines(src io.Reader) ([]byte, error) {
+	scanner := bufio.NewScanner(src)
+	var out bytes.Buffer
+
+	var emptyLines bytes.Buffer
+	prevGroupID := -1 // Initialize to invalid group
+	for scanner.Scan() {
+		line := bytes.TrimSpace(scanner.Bytes())
+
+		if len(line) == 0 {
+			fmt.Fprintf(&emptyLines, "%s\n", scanner.Bytes())
+			continue
+		}
+
+		// Discard emptyLines if previous non-empty line belonged to the same
+		// group as this line
+		if src, err := format.Source(line); err == nil {
+			groupID, ok := s[string(src)]
+			if ok && groupID == prevGroupID {
+				emptyLines.Reset()
+			}
+			prevGroupID = groupID
+		}
+
+		emptyLines.WriteTo(&out)
+		fmt.Fprintf(&out, "%s\n", scanner.Bytes())
+	}
+	if err := scanner.Err(); err != nil {
+		return nil, err
+	}
+	return out.Bytes(), nil
+}
+
+// filterImports removes unused imports from fileSrc, and returns a formatted src.
+func filterImports(fileSrc []byte) ([]byte, error) {
+	fset := token.NewFileSet()
+	file, err := parser.ParseFile(fset, "", fileSrc, parser.ParseComments)
+	if err != nil {
+		return nil, err
+	}
+	cmap := ast.NewCommentMap(fset, file, file.Comments)
+
+	// create set of references to imported identifiers
+	keepImport := make(map[string]bool)
+	for _, u := range file.Unresolved {
+		keepImport[u.Name] = true
+	}
+
+	// filter import declarations
+	decls := file.Decls
+	file.Decls = file.Decls[:0]
+	for _, decl := range decls {
+		importDecl, ok := decl.(*ast.GenDecl)
+
+		// Keep non-import declarations
+		if !ok || importDecl.Tok != token.IMPORT {
+			file.Decls = append(file.Decls, decl)
+			continue
+		}
+
+		// Filter the import specs
+		specs := importDecl.Specs
+		importDecl.Specs = importDecl.Specs[:0]
+		for _, spec := range specs {
+			iSpec := spec.(*ast.ImportSpec)
+			name, err := importName(iSpec)
+			if err != nil {
+				return nil, err
+			}
+
+			if keepImport[name] {
+				importDecl.Specs = append(importDecl.Specs, iSpec)
+			}
+		}
+		if len(importDecl.Specs) > 0 {
+			file.Decls = append(file.Decls, importDecl)
+		}
+	}
+
+	// filter file.Imports
+	imports := file.Imports
+	file.Imports = file.Imports[:0]
+	for _, spec := range imports {
+		name, err := importName(spec)
+		if err != nil {
+			return nil, err
+		}
+
+		if keepImport[name] {
+			file.Imports = append(file.Imports, spec)
+		}
+	}
+	file.Comments = cmap.Filter(file).Comments()
+
+	var buf bytes.Buffer
+	err = format.Node(&buf, fset, file)
+	if err != nil {
+		return nil, err
+	}
+
+	return buf.Bytes(), nil
+}
+
+// merge extracts duplicate code from archFiles and merges it to mergeFile.
+// 1. Construct commonSet: the set of code that is idential in all archFiles.
+// 2. Write the code in commonSet to mergedFile.
+// 3. Remove the commonSet code from all archFiles.
+func merge(mergedFile string, archFiles ...string) error {
+	// extract and validate the GOOS part of the merged filename
+	goos, ok := getValidGOOS(mergedFile)
+	if !ok {
+		return fmt.Errorf("invalid GOOS in merged file name %s", mergedFile)
+	}
+
+	// Read architecture files
+	var inSrc []srcFile
+	for _, file := range archFiles {
+		src, err := ioutil.ReadFile(file)
+		if err != nil {
+			return fmt.Errorf("cannot read archfile %s: %w", file, err)
+		}
+
+		inSrc = append(inSrc, srcFile{file, src})
+	}
+
+	// 1. Construct the set of top-level declarations common for all files
+	commonSet, err := getCommonSet(inSrc)
+	if err != nil {
+		return err
+	}
+	if commonSet.isEmpty() {
+		// No common code => do not modify any files
+		return nil
+	}
+
+	// 2. Write the merged file
+	mergedSrc, err := filter(inSrc[0].src, commonSet.keepCommon)
+	if err != nil {
+		return err
+	}
+
+	f, err := os.Create(mergedFile)
+	if err != nil {
+		return err
+	}
+
+	buf := bufio.NewWriter(f)
+	fmt.Fprintln(buf, "// Code generated by mkmerge.go; DO NOT EDIT.")
+	fmt.Fprintln(buf)
+	fmt.Fprintf(buf, "// +build %s\n", goos)
+	fmt.Fprintln(buf)
+	buf.Write(mergedSrc)
+
+	err = buf.Flush()
+	if err != nil {
+		return err
+	}
+	err = f.Close()
+	if err != nil {
+		return err
+	}
+
+	// 3. Remove duplicate declarations from the architecture files
+	for _, inFile := range inSrc {
+		src, err := filter(inFile.src, commonSet.keepArchSpecific)
+		if err != nil {
+			return err
+		}
+		err = ioutil.WriteFile(inFile.name, src, 0644)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func main() {
+	var mergedFile string
+	flag.StringVar(&mergedFile, "out", "", "Write merged code to `FILE`")
+	flag.Parse()
+
+	// Expand wildcards
+	var filenames []string
+	for _, arg := range flag.Args() {
+		matches, err := filepath.Glob(arg)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Invalid command line argument %q: %v\n", arg, err)
+			os.Exit(1)
+		}
+		filenames = append(filenames, matches...)
+	}
+
+	if len(filenames) < 2 {
+		// No need to merge
+		return
+	}
+
+	err := merge(mergedFile, filenames...)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Merge failed with error: %v\n", err)
+		os.Exit(1)
+	}
+}
diff --git a/unix/mkmerge_test.go b/unix/mkmerge_test.go
new file mode 100644
index 0000000..e628625
--- /dev/null
+++ b/unix/mkmerge_test.go
@@ -0,0 +1,505 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// Test cases for mkmerge.go.
+// Usage:
+//     $ go test mkmerge.go mkmerge_test.go
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"go/parser"
+	"go/token"
+	"html/template"
+	"strings"
+	"testing"
+)
+
+func TestImports(t *testing.T) {
+	t.Run("importName", func(t *testing.T) {
+		cases := []struct {
+			src   string
+			ident string
+		}{
+			{`"syscall"`, "syscall"},
+			{`. "foobar"`, "."},
+			{`"go/ast"`, "ast"},
+			{`moo "go/format"`, "moo"},
+			{`. "go/token"`, "."},
+			{`"golang.org/x/sys/unix"`, "unix"},
+			{`nix "golang.org/x/sys/unix"`, "nix"},
+			{`_ "golang.org/x/sys/unix"`, "_"},
+		}
+
+		for _, c := range cases {
+			pkgSrc := fmt.Sprintf("package main\nimport %s", c.src)
+
+			f, err := parser.ParseFile(token.NewFileSet(), "", pkgSrc, parser.ImportsOnly)
+			if err != nil {
+				t.Error(err)
+				continue
+			}
+			if len(f.Imports) != 1 {
+				t.Errorf("Got %d imports, expected 1", len(f.Imports))
+				continue
+			}
+
+			got, err := importName(f.Imports[0])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if got != c.ident {
+				t.Errorf("Got %q, expected %q", got, c.ident)
+			}
+		}
+	})
+
+	t.Run("filterImports", func(t *testing.T) {
+		cases := []struct{ before, after string }{
+			{`package test
+
+			import (
+				"foo"
+				"bar"
+			)`,
+				"package test\n"},
+			{`package test
+
+			import (
+				"foo"
+				"bar"
+			)
+
+			func useFoo() { foo.Usage() }`,
+				`package test
+
+import (
+	"foo"
+)
+
+func useFoo() { foo.Usage() }
+`},
+		}
+		for _, c := range cases {
+			got, err := filterImports([]byte(c.before))
+			if err != nil {
+				t.Error(err)
+			}
+
+			if string(got) != c.after {
+				t.Errorf("Got:\n%s\nExpected:\n%s\n", got, c.after)
+			}
+		}
+	})
+}
+
+func TestMerge(t *testing.T) {
+	// Input architecture files
+	inTmpl := template.Must(template.New("input").Parse(`
+// Package comments
+
+// build directives for arch{{.}}
+
+// +build goos,arch{{.}}
+
+package main
+
+/*
+#include <stdint.h>
+#include <stddef.h>
+int utimes(uintptr_t, uintptr_t);
+int utimensat(int, uintptr_t, uintptr_t, int);
+*/
+import "C"
+
+// The imports
+import (
+	"commonDep"
+	"uniqueDep{{.}}"
+)
+
+// Vars
+var (
+	commonVar = commonDep.Use("common")
+
+	uniqueVar{{.}} = "unique{{.}}"
+)
+
+// Common free standing comment
+
+// Common comment
+const COMMON_INDEPENDENT = 1234
+const UNIQUE_INDEPENDENT_{{.}} = "UNIQUE_INDEPENDENT_{{.}}"
+
+// Group comment
+const (
+	COMMON_GROUP = "COMMON_GROUP"
+	UNIQUE_GROUP_{{.}} = "UNIQUE_GROUP_{{.}}"
+)
+
+// Group2 comment
+const (
+	UNIQUE_GROUP21_{{.}} = "UNIQUE_GROUP21_{{.}}"
+	UNIQUE_GROUP22_{{.}} = "UNIQUE_GROUP22_{{.}}"
+)
+
+// Group3 comment
+const (
+	sub1Common1 = 11
+	sub1Unique2{{.}} = 12
+	sub1Common3_LONG = 13
+
+	sub2Unique1{{.}} = 21
+	sub2Common2 = 22
+	sub2Common3 = 23
+	sub2Unique4{{.}} = 24
+)
+
+type commonInt int
+
+type uniqueInt{{.}} int
+
+func commonF() string {
+	return commonDep.Use("common")
+	}
+
+func uniqueF() string {
+	C.utimes(0, 0)
+	return uniqueDep{{.}}.Use("{{.}}")
+	}
+
+// Group4 comment
+const (
+	sub3Common1 = 31
+	sub3Unique2{{.}} = 32
+	sub3Unique3{{.}} = 33
+	sub3Common4 = 34
+
+	sub4Common1, sub4Unique2{{.}} = 41, 42
+	sub4Unique3{{.}}, sub4Common4 = 43, 44
+)
+`))
+
+	// Filtered architecture files
+	outTmpl := template.Must(template.New("output").Parse(`// Package comments
+
+// build directives for arch{{.}}
+
+// +build goos,arch{{.}}
+
+package main
+
+/*
+#include <stdint.h>
+#include <stddef.h>
+int utimes(uintptr_t, uintptr_t);
+int utimensat(int, uintptr_t, uintptr_t, int);
+*/
+import "C"
+
+// The imports
+import (
+	"commonDep"
+	"uniqueDep{{.}}"
+)
+
+// Vars
+var (
+	commonVar = commonDep.Use("common")
+
+	uniqueVar{{.}} = "unique{{.}}"
+)
+
+const UNIQUE_INDEPENDENT_{{.}} = "UNIQUE_INDEPENDENT_{{.}}"
+
+// Group comment
+const (
+	UNIQUE_GROUP_{{.}} = "UNIQUE_GROUP_{{.}}"
+)
+
+// Group2 comment
+const (
+	UNIQUE_GROUP21_{{.}} = "UNIQUE_GROUP21_{{.}}"
+	UNIQUE_GROUP22_{{.}} = "UNIQUE_GROUP22_{{.}}"
+)
+
+// Group3 comment
+const (
+	sub1Unique2{{.}} = 12
+
+	sub2Unique1{{.}} = 21
+	sub2Unique4{{.}} = 24
+)
+
+type uniqueInt{{.}} int
+
+func uniqueF() string {
+	C.utimes(0, 0)
+	return uniqueDep{{.}}.Use("{{.}}")
+}
+
+// Group4 comment
+const (
+	sub3Unique2{{.}} = 32
+	sub3Unique3{{.}} = 33
+
+	sub4Common1, sub4Unique2{{.}} = 41, 42
+	sub4Unique3{{.}}, sub4Common4 = 43, 44
+)
+`))
+
+	const mergedFile = `// Package comments
+
+package main
+
+// The imports
+import (
+	"commonDep"
+)
+
+// Common free standing comment
+
+// Common comment
+const COMMON_INDEPENDENT = 1234
+
+// Group comment
+const (
+	COMMON_GROUP = "COMMON_GROUP"
+)
+
+// Group3 comment
+const (
+	sub1Common1      = 11
+	sub1Common3_LONG = 13
+
+	sub2Common2 = 22
+	sub2Common3 = 23
+)
+
+type commonInt int
+
+func commonF() string {
+	return commonDep.Use("common")
+}
+
+// Group4 comment
+const (
+	sub3Common1 = 31
+	sub3Common4 = 34
+)
+`
+
+	// Generate source code for different "architectures"
+	var inFiles, outFiles []srcFile
+	for _, arch := range strings.Fields("A B C D") {
+		buf := new(bytes.Buffer)
+		err := inTmpl.Execute(buf, arch)
+		if err != nil {
+			t.Fatal(err)
+		}
+		inFiles = append(inFiles, srcFile{"file" + arch, buf.Bytes()})
+
+		buf = new(bytes.Buffer)
+		err = outTmpl.Execute(buf, arch)
+		if err != nil {
+			t.Fatal(err)
+		}
+		outFiles = append(outFiles, srcFile{"file" + arch, buf.Bytes()})
+	}
+
+	t.Run("getCodeSet", func(t *testing.T) {
+		got, err := getCodeSet(inFiles[0].src)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		expectedElems := []codeElem{
+			{token.COMMENT, "Package comments\n"},
+			{token.COMMENT, "build directives for archA\n"},
+			{token.COMMENT, "+build goos,archA\n"},
+			{token.CONST, `COMMON_INDEPENDENT = 1234`},
+			{token.CONST, `UNIQUE_INDEPENDENT_A = "UNIQUE_INDEPENDENT_A"`},
+			{token.CONST, `COMMON_GROUP = "COMMON_GROUP"`},
+			{token.CONST, `UNIQUE_GROUP_A = "UNIQUE_GROUP_A"`},
+			{token.CONST, `UNIQUE_GROUP21_A = "UNIQUE_GROUP21_A"`},
+			{token.CONST, `UNIQUE_GROUP22_A = "UNIQUE_GROUP22_A"`},
+			{token.CONST, `sub1Common1 = 11`},
+			{token.CONST, `sub1Unique2A = 12`},
+			{token.CONST, `sub1Common3_LONG = 13`},
+			{token.CONST, `sub2Unique1A = 21`},
+			{token.CONST, `sub2Common2 = 22`},
+			{token.CONST, `sub2Common3 = 23`},
+			{token.CONST, `sub2Unique4A = 24`},
+			{token.CONST, `sub3Common1 = 31`},
+			{token.CONST, `sub3Unique2A = 32`},
+			{token.CONST, `sub3Unique3A = 33`},
+			{token.CONST, `sub3Common4 = 34`},
+			{token.CONST, `sub4Common1, sub4Unique2A = 41, 42`},
+			{token.CONST, `sub4Unique3A, sub4Common4 = 43, 44`},
+			{token.TYPE, `commonInt int`},
+			{token.TYPE, `uniqueIntA int`},
+			{token.FUNC, `func commonF() string {
+	return commonDep.Use("common")
+}`},
+			{token.FUNC, `func uniqueF() string {
+	C.utimes(0, 0)
+	return uniqueDepA.Use("A")
+}`},
+		}
+		expected := newCodeSet()
+		for _, d := range expectedElems {
+			expected.add(d)
+		}
+
+		if len(got.set) != len(expected.set) {
+			t.Errorf("Got %d codeElems, expected %d", len(got.set), len(expected.set))
+		}
+		for expElem := range expected.set {
+			if !got.has(expElem) {
+				t.Errorf("Didn't get expected codeElem %#v", expElem)
+			}
+		}
+		for gotElem := range got.set {
+			if !expected.has(gotElem) {
+				t.Errorf("Got unexpected codeElem %#v", gotElem)
+			}
+		}
+	})
+
+	t.Run("getCommonSet", func(t *testing.T) {
+		got, err := getCommonSet(inFiles)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		expected := newCodeSet()
+		expected.add(codeElem{token.COMMENT, "Package comments\n"})
+		expected.add(codeElem{token.CONST, `COMMON_INDEPENDENT = 1234`})
+		expected.add(codeElem{token.CONST, `COMMON_GROUP = "COMMON_GROUP"`})
+		expected.add(codeElem{token.CONST, `sub1Common1 = 11`})
+		expected.add(codeElem{token.CONST, `sub1Common3_LONG = 13`})
+		expected.add(codeElem{token.CONST, `sub2Common2 = 22`})
+		expected.add(codeElem{token.CONST, `sub2Common3 = 23`})
+		expected.add(codeElem{token.CONST, `sub3Common1 = 31`})
+		expected.add(codeElem{token.CONST, `sub3Common4 = 34`})
+		expected.add(codeElem{token.TYPE, `commonInt int`})
+		expected.add(codeElem{token.FUNC, `func commonF() string {
+	return commonDep.Use("common")
+}`})
+
+		if len(got.set) != len(expected.set) {
+			t.Errorf("Got %d codeElems, expected %d", len(got.set), len(expected.set))
+		}
+		for expElem := range expected.set {
+			if !got.has(expElem) {
+				t.Errorf("Didn't get expected codeElem %#v", expElem)
+			}
+		}
+		for gotElem := range got.set {
+			if !expected.has(gotElem) {
+				t.Errorf("Got unexpected codeElem %#v", gotElem)
+			}
+		}
+	})
+
+	t.Run("filter(keepCommon)", func(t *testing.T) {
+		commonSet, err := getCommonSet(inFiles)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		got, err := filter(inFiles[0].src, commonSet.keepCommon)
+		expected := []byte(mergedFile)
+
+		if !bytes.Equal(got, expected) {
+			t.Errorf("Got:\n%s\nExpected:\n%s", addLineNr(got), addLineNr(expected))
+			diffLines(t, got, expected)
+		}
+	})
+
+	t.Run("filter(keepArchSpecific)", func(t *testing.T) {
+		commonSet, err := getCommonSet(inFiles)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		for i := range inFiles {
+			got, err := filter(inFiles[i].src, commonSet.keepArchSpecific)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			expected := outFiles[i].src
+
+			if !bytes.Equal(got, expected) {
+				t.Errorf("Got:\n%s\nExpected:\n%s", addLineNr(got), addLineNr(expected))
+				diffLines(t, got, expected)
+			}
+		}
+	})
+}
+
+func TestMergedName(t *testing.T) {
+	t.Run("getValidGOOS", func(t *testing.T) {
+		testcases := []struct {
+			filename, goos string
+			ok             bool
+		}{
+			{"zerrors_aix.go", "aix", true},
+			{"zerrors_darwin.go", "darwin", true},
+			{"zerrors_dragonfly.go", "dragonfly", true},
+			{"zerrors_freebsd.go", "freebsd", true},
+			{"zerrors_linux.go", "linux", true},
+			{"zerrors_netbsd.go", "netbsd", true},
+			{"zerrors_openbsd.go", "openbsd", true},
+			{"zerrors_solaris.go", "solaris", true},
+			{"zerrors_multics.go", "", false},
+		}
+		for _, tc := range testcases {
+			goos, ok := getValidGOOS(tc.filename)
+			if goos != tc.goos {
+				t.Errorf("got GOOS %q, expected %q", goos, tc.goos)
+			}
+			if ok != tc.ok {
+				t.Errorf("got ok %v, expected %v", ok, tc.ok)
+			}
+		}
+	})
+}
+
+// Helper functions to diff test sources
+
+func diffLines(t *testing.T, got, expected []byte) {
+	t.Helper()
+
+	gotLines := bytes.Split(got, []byte{'\n'})
+	expLines := bytes.Split(expected, []byte{'\n'})
+
+	i := 0
+	for i < len(gotLines) && i < len(expLines) {
+		if !bytes.Equal(gotLines[i], expLines[i]) {
+			t.Errorf("Line %d: Got:\n%q\nExpected:\n%q", i+1, gotLines[i], expLines[i])
+			return
+		}
+		i++
+	}
+
+	if i < len(gotLines) && i >= len(expLines) {
+		t.Errorf("Line %d: got %q, expected EOF", i+1, gotLines[i])
+	}
+	if i >= len(gotLines) && i < len(expLines) {
+		t.Errorf("Line %d: got EOF, expected %q", i+1, gotLines[i])
+	}
+}
+
+func addLineNr(src []byte) []byte {
+	lines := bytes.Split(src, []byte("\n"))
+	for i, line := range lines {
+		lines[i] = []byte(fmt.Sprintf("%d: %s", i+1, line))
+	}
+	return bytes.Join(lines, []byte("\n"))
+}