| // Copyright 2023 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package main |
| |
| import ( |
| "archive/tar" |
| "archive/zip" |
| "bufio" |
| "bytes" |
| "compress/gzip" |
| "crypto/sha256" |
| "encoding/json" |
| "fmt" |
| "hash/crc32" |
| "io" |
| "io/fs" |
| "net/http" |
| "os" |
| "path/filepath" |
| "sort" |
| "strconv" |
| "strings" |
| ) |
| |
| // SHA256 returns the hexadecimal SHA256 hash of data. |
| func SHA256(data []byte) string { |
| sum := sha256.Sum256(data) |
| return fmt.Sprintf("%x", sum[:]) |
| } |
| |
| // Get returns the content at the named URL. |
| func Get(log *Log, url string) (data []byte, err error) { |
| defer func() { |
| if err != nil && log != nil { |
| log.Printf("%s", err) |
| } |
| }() |
| |
| resp, err := http.Get(url) |
| if err != nil { |
| return nil, err |
| } |
| defer resp.Body.Close() |
| if resp.StatusCode != 200 { |
| return nil, fmt.Errorf("get %s: %s", url, resp.Status) |
| } |
| data, err = io.ReadAll(resp.Body) |
| if err != nil { |
| return nil, fmt.Errorf("get %s: %s", url, err) |
| } |
| if log != nil { |
| log.Printf("downloaded %s", url) |
| } |
| return data, nil |
| } |
| |
| // GerritTarGz returns a .tar.gz file corresponding to the named repo and ref on Go's Gerrit server. |
| func GerritTarGz(log *Log, repo, ref string) ([]byte, error) { |
| return Get(log, "https://go.googlesource.com/"+repo+"/+archive/"+ref+".tar.gz") |
| } |
| |
| // A DLRelease is the JSON for a release, returned by go.dev/dl. |
| type DLRelease struct { |
| Version string `json:"version"` |
| Stable bool `json:"stable"` |
| Files []*DLFile `json:"files"` |
| } |
| |
| // A DLFile is the JSON for a file, returned by go.dev/dl. |
| type DLFile struct { |
| Name string `json:"filename"` |
| GOOS string `json:"os"` |
| GOARCH string `json:"arch"` |
| Version string `json:"version"` |
| SHA256 string `json:"sha256"` |
| Size int64 `json:"size"` |
| Kind string `json:"kind"` // "archive", "installer", "source" |
| } |
| |
| // DLReleases returns the release list from go.dev/dl. |
| func DLReleases(log *Log) ([]*DLRelease, error) { |
| var all []*DLRelease |
| data, err := Get(log, "https://go.dev/dl/?mode=json&include=all") |
| if err != nil { |
| return nil, err |
| } |
| if err := json.Unmarshal(data, &all); err != nil { |
| return nil, fmt.Errorf("unmarshaling releases JSON: %v", err) |
| } |
| |
| for _, r := range all { |
| for _, f := range r.Files { |
| if f.GOARCH == "armv6l" { |
| f.GOARCH = "arm" |
| } |
| } |
| } |
| return all, nil |
| } |
| |
| // OpenTarGz returns a tar.Reader for the given tgz data. |
| func OpenTarGz(tgz []byte) (*tar.Reader, error) { |
| zr, err := gzip.NewReader(bytes.NewReader(tgz)) |
| if err != nil { |
| return nil, err |
| } |
| return tar.NewReader(zr), nil |
| } |
| |
| // UnpackTarGz unpacks the given tgz data into the named directory. |
| // On error the directory may contain partial contents. |
| func UnpackTarGz(dir string, tgz []byte) error { |
| if err := os.MkdirAll(dir, 0777); err != nil { |
| return err |
| } |
| tr, err := OpenTarGz(tgz) |
| if err != nil { |
| return err |
| } |
| for { |
| hdr, err := tr.Next() |
| if err != nil { |
| if err == io.EOF { |
| break |
| } |
| return err |
| } |
| if hdr.Typeflag == tar.TypeDir { |
| // Ignore directories entirely |
| continue |
| } |
| name := filepath.FromSlash(hdr.Name) |
| if name != filepath.Clean(name) || strings.HasPrefix(name, "..") || filepath.IsAbs(name) { |
| return fmt.Errorf("invalid name in tgz: %#q", hdr.Name) |
| } |
| targ := filepath.Join(dir, name) |
| if err := os.MkdirAll(filepath.Dir(targ), 0777); err != nil { |
| return err |
| } |
| f, err := os.OpenFile(targ, os.O_CREATE|os.O_WRONLY, fs.FileMode(hdr.Mode&0777)) |
| if err != nil { |
| return err |
| } |
| if _, err := io.Copy(f, tr); err != nil { |
| f.Close() |
| return err |
| } |
| if err := f.Close(); err != nil { |
| return err |
| } |
| } |
| return nil |
| } |
| |
| // OpenZip returns a zip.Reader for the given zip data. |
| func OpenZip(zipdata []byte) (*zip.Reader, error) { |
| return zip.NewReader(bytes.NewReader(zipdata), int64(len(zipdata))) |
| } |
| |
| // UnpackZip unpacks the given zip data into the named directory. |
| // On error the directory may contain partial contents. |
| func UnpackZip(dir string, zipdata []byte) error { |
| if err := os.MkdirAll(dir, 0777); err != nil { |
| return err |
| } |
| zr, err := OpenZip(zipdata) |
| if err != nil { |
| return err |
| } |
| for _, zf := range zr.File { |
| if strings.HasSuffix(zf.Name, "/") { |
| // Ignore directories entirely |
| continue |
| } |
| name := filepath.FromSlash(zf.Name) |
| if name != filepath.Clean(name) || strings.HasPrefix(name, "..") || filepath.IsAbs(name) { |
| return fmt.Errorf("invalid name in zip: %#q", zf.Name) |
| } |
| targ := filepath.Join(dir, name) |
| if err := os.MkdirAll(filepath.Dir(targ), 0777); err != nil { |
| return err |
| } |
| f, err := os.OpenFile(targ, os.O_CREATE|os.O_WRONLY, 0666) |
| if err != nil { |
| return err |
| } |
| zr, err := zf.Open() |
| if err != nil { |
| f.Close() |
| return err |
| } |
| _, err = io.Copy(f, zr) |
| zr.Close() |
| if err != nil { |
| f.Close() |
| return err |
| } |
| if err := f.Close(); err != nil { |
| return err |
| } |
| } |
| return nil |
| } |
| |
| // A Fixer is a transformation on file content applied during indexing. |
| // It lets us edit away permitted differences between files, such as code |
| // signatures that cannot be reproduced without the signing keys. |
| type Fixer = func(*Log, string, []byte) []byte |
| |
| // A TarFile summarizes a single file in a tar archive: |
| // it records the exact header and the SHA256 of the content. |
| type TarFile struct { |
| tar.Header |
| SHA256 string |
| } |
| |
| // A ZipFile summarizes a single file in a zip archive: |
| // it records the exact header and the SHA256 of the content. |
| type ZipFile struct { |
| zip.FileHeader |
| SHA256 string |
| } |
| |
| // A CpioFile represents a single file in a CPIO archive. |
| type CpioFile struct { |
| Name string |
| Mode fs.FileMode |
| Size int64 |
| SHA256 string |
| } |
| |
| // IndexTarGz parses tgz as a gzip-compressed tar file and returns an index of its content. |
| // If fix is non-nil, it is applied to file content before indexing. |
| // This lets us strip code signatures that cannot be reproduced. |
| func IndexTarGz(log *Log, tgz []byte, fix Fixer) map[string]*TarFile { |
| tr, err := OpenTarGz(tgz) |
| if err != nil { |
| log.Printf("%v", err) |
| return nil |
| } |
| ix := make(map[string]*TarFile) |
| for { |
| hdr, err := tr.Next() |
| if err != nil { |
| if err == io.EOF { |
| break |
| } |
| log.Printf("reading tgz: %v", err) |
| return nil |
| } |
| if hdr.Typeflag == tar.TypeDir { |
| // Ignore directories entirely |
| continue |
| } |
| data, err := io.ReadAll(tr) |
| if err != nil { |
| log.Printf("reading %s from tgz: %v", hdr.Name, err) |
| return nil |
| } |
| if fix != nil { |
| data = fix(log, hdr.Name, data) |
| hdr.Size = int64(len(data)) |
| } |
| ix[hdr.Name] = &TarFile{*hdr, SHA256(data)} |
| } |
| return ix |
| } |
| |
| // IndexZip parses zipdata as a zip archive and returns an index of its content. |
| // If fix is non-nil, it is applied to file content before indexing. |
| // This lets us strip code signatures that cannot be reproduced. |
| func IndexZip(log *Log, zipdata []byte, fix Fixer) map[string]*ZipFile { |
| zr, err := zip.NewReader(bytes.NewReader(zipdata), int64(len(zipdata))) |
| if err != nil { |
| log.Printf("%v", err) |
| return nil |
| } |
| ix := make(map[string]*ZipFile) |
| for _, hdr := range zr.File { |
| if strings.HasSuffix(hdr.Name, "/") { |
| // Ignore directories entirely |
| continue |
| } |
| rc, err := hdr.Open() |
| if err != nil { |
| log.Printf("%v", err) |
| return nil |
| } |
| data, err := io.ReadAll(rc) |
| rc.Close() |
| if err != nil { |
| log.Printf("%v", err) |
| return nil |
| } |
| if fix != nil { |
| data = fix(log, hdr.Name, data) |
| hdr.CRC32 = crc32.ChecksumIEEE(data) |
| hdr.UncompressedSize = uint32(len(data)) |
| hdr.UncompressedSize64 = uint64(len(data)) |
| } |
| ix[hdr.Name] = &ZipFile{hdr.FileHeader, SHA256(data)} |
| } |
| return ix |
| } |
| |
| // IndexCpioGz parses data as a gzip-compressed cpio file and returns an index of its content. |
| // If fix is non-nil, it is applied to file content before indexing. |
| // This lets us strip code signatures that cannot be reproduced. |
| func IndexCpioGz(log *Log, data []byte, fix Fixer) map[string]*CpioFile { |
| zr, err := gzip.NewReader(bytes.NewReader(data)) |
| if err != nil { |
| log.Printf("%v", err) |
| return nil |
| } |
| br := bufio.NewReader(zr) |
| |
| const hdrSize = 76 |
| |
| ix := make(map[string]*CpioFile) |
| hdr := make([]byte, hdrSize) |
| for { |
| _, err := io.ReadFull(br, hdr) |
| if err != nil { |
| if err == io.EOF { |
| break |
| } |
| log.Printf("reading archive: %v", err) |
| return nil |
| } |
| |
| // https://www.mkssoftware.com/docs/man4/cpio.4.asp |
| // |
| // hdr[0:6] "070707" |
| // hdr[6:12] device number (all numbers '0'-padded octal) |
| // hdr[12:18] inode number |
| // hdr[18:24] mode |
| // hdr[24:30] uid |
| // hdr[30:36] gid |
| // hdr[36:42] nlink |
| // hdr[42:48] rdev |
| // hdr[48:59] mtime |
| // hdr[59:65] name length |
| // hdr[65:76] file size |
| |
| if !allOctal(hdr[:]) || string(hdr[:6]) != "070707" { |
| log.Printf("reading archive: malformed entry") |
| return nil |
| } |
| mode, _ := strconv.ParseInt(string(hdr[18:24]), 8, 64) |
| nameLen, _ := strconv.ParseInt(string(hdr[59:65]), 8, 64) |
| size, _ := strconv.ParseInt(string(hdr[65:76]), 8, 64) |
| nameBuf := make([]byte, nameLen) |
| if _, err := io.ReadFull(br, nameBuf); err != nil { |
| log.Printf("reading archive: %v", err) |
| return nil |
| } |
| if nameLen == 0 || nameBuf[nameLen-1] != 0 { |
| log.Printf("reading archive: malformed entry") |
| return nil |
| } |
| name := string(nameBuf[:nameLen-1]) |
| |
| // The MKS cpio page says "TRAILER!!" |
| // but the Apple pkg files use "TRAILER!!!". |
| if name == "TRAILER!!!" { |
| break |
| } |
| |
| fmode := fs.FileMode(mode & 0777) |
| if mode&040000 != 0 { |
| fmode |= fs.ModeDir |
| } |
| |
| data, err := io.ReadAll(io.LimitReader(br, size)) |
| if err != nil { |
| log.Printf("reading archive: %v", err) |
| return nil |
| } |
| if size != int64(len(data)) { |
| log.Printf("reading archive: short file") |
| return nil |
| } |
| |
| if fmode&fs.ModeDir != 0 { |
| continue |
| } |
| |
| if fix != nil { |
| data = fix(log, name, data) |
| size = int64(len(data)) |
| } |
| ix[name] = &CpioFile{name, fmode, size, SHA256(data)} |
| } |
| return ix |
| } |
| |
| // allOctal reports whether x is entirely ASCII octal digits. |
| func allOctal(x []byte) bool { |
| for _, b := range x { |
| if b < '0' || '7' < b { |
| return false |
| } |
| } |
| return true |
| } |
| |
| // DiffArchive diffs the archives 'rebuild' and 'posted' based on their indexes. |
| // It reports to log any files that appear only in one or the other. |
| // For files that appear in both, DiffArchive calls check, which should |
| // log any differences found and report whether the files match. |
| // It reports whether the archives match. |
| // If either of rebuild or posted is nil, DiffArchive returns false without logging, |
| // assuming that the code that returned the nil archive took care of reporting the problem. |
| func DiffArchive[File1, File2 any](log *Log, |
| rebuilt map[string]File1, posted map[string]File2, |
| check func(*Log, File1, File2) bool) bool { |
| |
| if rebuilt == nil || posted == nil { |
| return false |
| } |
| |
| // Build list of all names; will have duplicates. |
| var names []string |
| for name := range rebuilt { |
| names = append(names, name) |
| } |
| for name := range posted { |
| names = append(names, name) |
| } |
| sort.Strings(names) |
| |
| match := true |
| for _, name := range names { |
| fr, okr := rebuilt[name] |
| fp, okp := posted[name] |
| if !okr && !okp { // duplicate name |
| continue |
| } |
| if !okr { |
| log.Printf("%s: missing from posted archive", name) |
| match = false |
| continue |
| } |
| if !okr { |
| log.Printf("%s: unexpected file in posted archive", name) |
| match = false |
| continue |
| } |
| delete(rebuilt, name) |
| delete(posted, name) |
| |
| if !check(log, fr, fp) { |
| match = false |
| } |
| } |
| return match |
| } |
| |
| // DiffTarGz diffs the tgz files rebuilt and posted, reporting any differences to log |
| // and applying fix to files before comparing them. |
| // It reports whether the archives match. |
| func DiffTarGz(log *Log, rebuilt, posted []byte, fix Fixer) bool { |
| n := 0 |
| check := func(log *Log, rebuilt, posted *TarFile) bool { |
| match := true |
| name := rebuilt.Name |
| field := func(what string, rebuilt, posted any) { |
| if posted != rebuilt { |
| if n++; n <= 100 { |
| log.Printf("%s: rebuilt %s = %v, posted = %v", name, what, rebuilt, posted) |
| } else if n == 101 { |
| log.Printf("eliding additional diffs ...") |
| } |
| match = false |
| } |
| } |
| r := rebuilt |
| p := posted |
| field("typeflag", r.Typeflag, p.Typeflag) |
| field("linkname", r.Linkname, p.Linkname) |
| field("mode", r.Mode, p.Mode) |
| field("uid", r.Uid, p.Uid) |
| field("gid", r.Gid, p.Gid) |
| field("uname", r.Uname, p.Uname) |
| field("gname", r.Gname, p.Gname) |
| field("mtime", r.ModTime, p.ModTime) |
| field("atime", r.AccessTime, p.AccessTime) |
| field("ctime", r.ChangeTime, p.ChangeTime) |
| field("devmajor", r.Devmajor, p.Devmajor) |
| field("devminor", r.Devminor, p.Devminor) |
| for k, vhdr := range r.PAXRecords { |
| field("PAX:"+k, vhdr, p.PAXRecords[k]) |
| } |
| for k, vf := range p.PAXRecords { |
| if vhdr, ok := r.PAXRecords[k]; !ok { |
| field("PAX:"+k, vhdr, vf) |
| } |
| } |
| field("format", r.Format, p.Format) |
| field("size", r.Size, p.Size) |
| field("content", r.SHA256, p.SHA256) |
| return match |
| } |
| |
| return DiffArchive(log, IndexTarGz(log, rebuilt, fix), IndexTarGz(log, posted, fix), check) |
| } |
| |
| // DiffZip diffs the zip files rebuilt and posted, reporting any differences to log |
| // and applying fix to files before comparing them. |
| // It reports whether the archives match. |
| func DiffZip(log *Log, rebuilt, posted []byte, fix Fixer) bool { |
| n := 0 |
| check := func(log *Log, rebuilt, posted *ZipFile) bool { |
| match := true |
| name := rebuilt.Name |
| field := func(what string, rebuilt, posted any) { |
| if posted != rebuilt { |
| if n++; n <= 100 { |
| log.Printf("%s: rebuilt %s = %v, posted = %v", name, what, rebuilt, posted) |
| } else if n == 101 { |
| log.Printf("eliding additional diffs ...") |
| } |
| match = false |
| } |
| } |
| r := rebuilt |
| p := posted |
| |
| field("comment", r.Comment, p.Comment) |
| field("nonutf8", r.NonUTF8, p.NonUTF8) |
| field("creatorversion", r.CreatorVersion, p.CreatorVersion) |
| field("readerversion", r.ReaderVersion, p.ReaderVersion) |
| field("flags", r.Flags, p.Flags) |
| field("method", r.Method, p.Method) |
| // Older versions of Go produce unequal Modified times in archive/zip, |
| // presumably due to some kind of archive/zip parsing error, |
| // or perhaps due to the Extra field being doubled below. |
| // The problem does not happen with Go 1.20. |
| // To allow people to use older Go versions to run gorebuild, |
| // we only check the actual time instant, not the location, in Modified. |
| field("modifiedUnix", r.Modified.UnixNano(), p.Modified.UnixNano()) |
| field("mtime", r.ModifiedTime, p.ModifiedTime) |
| field("mdate", r.ModifiedDate, p.ModifiedDate) |
| if len(p.Extra) == 2*len(r.Extra) && string(p.Extra) == string(r.Extra)+string(r.Extra) { |
| // Mac signing rewrites the zip file, which ends up doubling |
| // the Extra field due to go.dev/issue/61572. |
| // Allow that. |
| } else { |
| field("extra", fmt.Sprintf("%x", r.Extra), fmt.Sprintf("%x", p.Extra)) |
| } |
| field("crc32", r.CRC32, p.CRC32) |
| field("xattrs", r.ExternalAttrs, p.ExternalAttrs) |
| field("usize32", r.UncompressedSize, p.UncompressedSize) |
| field("usize64", r.UncompressedSize64, p.UncompressedSize64) |
| field("content", r.SHA256, p.SHA256) |
| return match |
| } |
| |
| return DiffArchive(log, IndexZip(log, rebuilt, fix), IndexZip(log, posted, fix), check) |
| } |