internal/gitfs: add a generated copy of x/website/internal/gitfs
It's generated rather than copied manually, so that it is clear its
canonical source still lives at golang.org/x/website/internal/gitfs.
For golang/go#68873.
Change-Id: I2ec03384666d5c230e59c36db118e7f3969a8e11
Reviewed-on: https://go-review.googlesource.com/c/build/+/617777
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Auto-Submit: Dmitri Shuralyov <dmitshur@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/internal/gitfs/doc.go b/internal/gitfs/doc.go
new file mode 100644
index 0000000..36b2cc6
--- /dev/null
+++ b/internal/gitfs/doc.go
@@ -0,0 +1,6 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package gitfs presents a file tree downloaded from a remote Git repo as an in-memory fs.FS.
+package gitfs
diff --git a/internal/gitfs/gitfs.go b/internal/gitfs/gitfs.go
new file mode 100644
index 0000000..9b9e031
--- /dev/null
+++ b/internal/gitfs/gitfs.go
@@ -0,0 +1,996 @@
+// Code generated by golang.org/x/tools/cmd/bundle. DO NOT EDIT.
+//go:generate bundle -o gitfs.go -prefix= golang.org/x/website/internal/gitfs
+
+// Package gitfs presents a file tree downloaded from a remote Git repo as an in-memory fs.FS.
+//
+
+package gitfs
+
+import (
+ "bufio"
+ "bytes"
+ "compress/zlib"
+ "crypto/sha1"
+ "encoding/binary"
+ "encoding/hex"
+ "fmt"
+ hashpkg "hash"
+ "io"
+ "io/fs"
+ "net/http"
+ "runtime/debug"
+ "strconv"
+ "strings"
+ "time"
+)
+
+// A Hash is a SHA-1 Hash identifying a particular Git object.
+type Hash [20]byte
+
+func (h Hash) String() string { return fmt.Sprintf("%x", h[:]) }
+
+// parseHash parses the (full-length) Git hash text.
+func parseHash(text string) (Hash, error) {
+ x, err := hex.DecodeString(text)
+ if err != nil || len(x) != 20 {
+ return Hash{}, fmt.Errorf("invalid hash")
+ }
+ var h Hash
+ copy(h[:], x)
+ return h, nil
+}
+
+// An objType is an object type indicator.
+// The values are the ones used in Git pack encoding
+// (https://git-scm.com/docs/pack-format#_object_types).
+type objType int
+
+const (
+ objNone objType = 0
+ objCommit objType = 1
+ objTree objType = 2
+ objBlob objType = 3
+ objTag objType = 4
+ // 5 undefined
+ objOfsDelta objType = 6
+ objRefDelta objType = 7
+)
+
+var objTypes = [...]string{
+ objCommit: "commit",
+ objTree: "tree",
+ objBlob: "blob",
+ objTag: "tag",
+}
+
+func (t objType) String() string {
+ if t < 0 || int(t) >= len(objTypes) || objTypes[t] == "" {
+ return fmt.Sprintf("objType(%d)", int(t))
+ }
+ return objTypes[t]
+}
+
+// A dirEntry is a Git directory entry parsed from a tree object.
+type dirEntry struct {
+ mode int
+ name []byte
+ hash Hash
+}
+
+// parseDirEntry parses the next directory entry from data,
+// returning the entry and the number of bytes it occupied.
+// If data is malformed, parseDirEntry returns dirEntry{}, 0.
+func parseDirEntry(data []byte) (dirEntry, int) {
+ // Unclear where or if this format is documented by Git.
+ // Each directory entry is an octal mode, then a space,
+ // then a file name, then a NUL byte, then a 20-byte binary hash.
+ // Note that 'git cat-file -p <treehash>' shows a textual representation
+ // of this data, not the actual binary data. To see the binary data,
+ // use 'echo <treehash> | git cat-file --batch | hexdump -C'.
+ mode := 0
+ i := 0
+ for i < len(data) && data[i] != ' ' {
+ c := data[i]
+ if c < '0' || '7' < c {
+ return dirEntry{}, 0
+ }
+ mode = mode*8 + int(c) - '0'
+ i++
+ }
+ i++
+ j := i
+ for j < len(data) && data[j] != 0 {
+ j++
+ }
+ if len(data)-j < 1+20 {
+ return dirEntry{}, 0
+ }
+ name := data[i:j]
+ var h Hash
+ copy(h[:], data[j+1:])
+ return dirEntry{mode, name, h}, j + 1 + 20
+}
+
+// treeLookup looks in the tree object data for the directory entry with the given name,
+// returning the mode and hash associated with the name.
+func treeLookup(data []byte, name string) (mode int, h Hash, ok bool) {
+ // Note: The tree object directory entries are sorted by name,
+ // but the directory entry data is not self-synchronizing,
+ // so it's not possible to be clever and use a binary search here.
+ for len(data) > 0 {
+ e, size := parseDirEntry(data)
+ if size == 0 {
+ break
+ }
+ if string(e.name) == name {
+ return e.mode, e.hash, true
+ }
+ data = data[size:]
+ }
+ return 0, Hash{}, false
+}
+
+// commitKeyValue parses the commit object data
+// looking for the first header line "key: value" matching the given key.
+// It returns the associated value.
+// (Try 'git cat-file -p <commithash>' to see the commit data format.)
+func commitKeyValue(data []byte, key string) ([]byte, bool) {
+ for i := 0; i < len(data); i++ {
+ if i == 0 || data[i-1] == '\n' {
+ if data[i] == '\n' {
+ break
+ }
+ if len(data)-i >= len(key)+1 && data[len(key)] == ' ' && string(data[:len(key)]) == key {
+ val := data[len(key)+1:]
+ for j := 0; j < len(val); j++ {
+ if val[j] == '\n' {
+ val = val[:j]
+ break
+ }
+ }
+ return val, true
+ }
+ }
+ }
+ return nil, false
+}
+
+// A store is a collection of Git objects, indexed for lookup by hash.
+type store struct {
+ sha1 hashpkg.Hash // reused hash state
+ index map[Hash]stored // lookup index
+ data []byte // concatenation of all object data
+}
+
+// A stored describes a single stored object.
+type stored struct {
+ typ objType // object type
+ off int // object data is store.data[off:off+len]
+ len int
+}
+
+// add adds an object with the given type and content to s, returning its Hash.
+// If the object is already stored in s, add succeeds but doesn't store a second copy.
+func (s *store) add(typ objType, data []byte) (Hash, []byte) {
+ if s.sha1 == nil {
+ s.sha1 = sha1.New()
+ }
+
+ // Compute Git hash for object.
+ s.sha1.Reset()
+ fmt.Fprintf(s.sha1, "%s %d\x00", typ, len(data))
+ s.sha1.Write(data)
+ var h Hash
+ s.sha1.Sum(h[:0]) // appends into h
+
+ e, ok := s.index[h]
+ if !ok {
+ if s.index == nil {
+ s.index = make(map[Hash]stored)
+ }
+ e = stored{typ, len(s.data), len(data)}
+ s.index[h] = e
+ s.data = append(s.data, data...)
+ }
+ return h, s.data[e.off : e.off+e.len]
+}
+
+// object returns the type and data for the object with hash h.
+// If there is no object with hash h, object returns 0, nil.
+func (s *store) object(h Hash) (typ objType, data []byte) {
+ d, ok := s.index[h]
+ if !ok {
+ return 0, nil
+ }
+ return d.typ, s.data[d.off : d.off+d.len]
+}
+
+// commit returns a treeFS for the file system tree associated with the given commit hash.
+func (s *store) commit(h Hash) (*treeFS, error) {
+ // The commit object data starts with key-value pairs
+ typ, data := s.object(h)
+ if typ == objNone {
+ return nil, fmt.Errorf("commit %s: no such hash", h)
+ }
+ if typ != objCommit {
+ return nil, fmt.Errorf("commit %s: unexpected type %s", h, typ)
+ }
+ treeHash, ok := commitKeyValue(data, "tree")
+ if !ok {
+ return nil, fmt.Errorf("commit %s: no tree", h)
+ }
+ h, err := parseHash(string(treeHash))
+ if err != nil {
+ return nil, fmt.Errorf("commit %s: invalid tree %q", h, treeHash)
+ }
+ return &treeFS{s, h}, nil
+}
+
+// A treeFS is an fs.FS serving a Git file system tree rooted at a given tree object hash.
+type treeFS struct {
+ s *store
+ tree Hash // root tree
+}
+
+// Open opens the given file or directory, implementing the fs.FS Open method.
+func (t *treeFS) Open(name string) (f fs.File, err error) {
+ defer func() {
+ if e := recover(); e != nil {
+ f = nil
+ err = fmt.Errorf("gitfs panic: %v\n%s", e, debug.Stack())
+ }
+ }()
+
+ // Process each element in the slash-separated path, producing hash identified by name.
+ h := t.tree
+ start := 0 // index of start of final path element in name
+ if name != "." {
+ for i := 0; i <= len(name); i++ {
+ if i == len(name) || name[i] == '/' {
+ // Look up name in current tree object h.
+ typ, data := t.s.object(h)
+ if typ != objTree {
+ return nil, &fs.PathError{Path: name, Op: "open", Err: fs.ErrNotExist}
+ }
+ _, th, ok := treeLookup(data, name[start:i])
+ if !ok {
+ return nil, &fs.PathError{Path: name, Op: "open", Err: fs.ErrNotExist}
+ }
+ h = th
+ if i < len(name) {
+ start = i + 1
+ }
+ }
+ }
+ }
+
+ // The hash h is the hash for name. Load its object.
+ typ, data := t.s.object(h)
+ info := fileInfo{name, name[start:], 0, 0}
+ if typ == objBlob {
+ // Regular file.
+ info.mode = 0444
+ info.size = int64(len(data))
+ return &blobFile{info, bytes.NewReader(data)}, nil
+ }
+ if typ == objTree {
+ // Directory.
+ info.mode = fs.ModeDir | 0555
+ return &dirFile{t.s, info, data, 0}, nil
+ }
+ return nil, &fs.PathError{Path: name, Op: "open", Err: fmt.Errorf("unexpected git object type %s", typ)}
+}
+
+// fileInfo implements fs.FileInfo.
+type fileInfo struct {
+ path string
+ name string
+ mode fs.FileMode
+ size int64
+}
+
+func (i *fileInfo) Name() string { return i.name }
+
+func (i *fileInfo) Type() fs.FileMode { return i.mode & fs.ModeType }
+
+func (i *fileInfo) Mode() fs.FileMode { return i.mode }
+
+func (i *fileInfo) Sys() interface{} { return nil }
+
+func (i *fileInfo) IsDir() bool { return i.mode&fs.ModeDir != 0 }
+
+func (i *fileInfo) Size() int64 { return i.size }
+
+func (i *fileInfo) Info() (fs.FileInfo, error) { return i, nil }
+
+func (i *fileInfo) ModTime() time.Time { return time.Time{} }
+
+func (i *fileInfo) err(op string, err error) error {
+ return &fs.PathError{Path: i.path, Op: op, Err: err}
+}
+
+// A blobFile implements fs.File for a regular file.
+// The embedded bytes.Reader provides Read, Seek and other I/O methods.
+type blobFile struct {
+ info fileInfo
+ *bytes.Reader
+}
+
+func (f *blobFile) Close() error { return nil }
+
+func (f *blobFile) Stat() (fs.FileInfo, error) { return &f.info, nil }
+
+// A dirFile implements fs.File for a directory.
+type dirFile struct {
+ s *store
+ info fileInfo
+ data []byte
+ off int
+}
+
+func (f *dirFile) Close() error { return nil }
+
+func (f *dirFile) Read([]byte) (int, error) { return 0, f.info.err("read", fs.ErrInvalid) }
+
+func (f *dirFile) Stat() (fs.FileInfo, error) { return &f.info, nil }
+
+func (f *dirFile) Seek(offset int64, whence int) (int64, error) {
+ if offset == 0 && whence == 0 {
+ // Allow rewind to start of directory.
+ f.off = 0
+ return 0, nil
+ }
+ return 0, f.info.err("seek", fs.ErrInvalid)
+}
+
+func (f *dirFile) ReadDir(n int) (list []fs.DirEntry, err error) {
+ defer func() {
+ if e := recover(); e != nil {
+ list = nil
+ err = fmt.Errorf("gitfs panic: %v\n%s", e, debug.Stack())
+ }
+ }()
+
+ for (n <= 0 || len(list) < n) && f.off < len(f.data) {
+ e, size := parseDirEntry(f.data[f.off:])
+ if size == 0 {
+ break
+ }
+ f.off += size
+ typ, data := f.s.object(e.hash)
+ mode := fs.FileMode(0444)
+ if typ == objTree {
+ mode = fs.ModeDir | 0555
+ }
+ infoSize := int64(0)
+ if typ == objBlob {
+ infoSize = int64(len(data))
+ }
+ name := string(e.name)
+ list = append(list, &fileInfo{name, name, mode, infoSize})
+ }
+ if len(list) == 0 && n > 0 {
+ return list, io.EOF
+ }
+ return list, nil
+}
+
+// A Repo is a connection to a remote repository served over HTTP or HTTPS.
+type Repo struct {
+ url string // trailing slash removed
+ caps map[string]string
+}
+
+// NewRepo connects to a Git repository at the given http:// or https:// URL.
+func NewRepo(url string) (*Repo, error) {
+ r := &Repo{url: strings.TrimSuffix(url, "/")}
+ if err := r.handshake(); err != nil {
+ return nil, err
+ }
+ return r, nil
+}
+
+// handshake runs the initial Git opening handshake, learning the capabilities of the server.
+// See https://git-scm.com/docs/protocol-v2#_initial_client_request.
+func (r *Repo) handshake() error {
+ req, _ := http.NewRequest("GET", r.url+"/info/refs?service=git-upload-pack", nil)
+ req.Header.Set("Accept", "*/*")
+ req.Header.Set("Git-Protocol", "version=2")
+
+ resp, err := http.DefaultClient.Do(req)
+ if err != nil {
+ return fmt.Errorf("handshake: %v", err)
+ }
+ defer resp.Body.Close()
+ data, err := io.ReadAll(resp.Body)
+ if resp.StatusCode != 200 {
+ return fmt.Errorf("handshake: %v\n%s", resp.Status, data)
+ }
+ if err != nil {
+ return fmt.Errorf("handshake: reading body: %v", err)
+ }
+ if ct := resp.Header.Get("Content-Type"); ct != "application/x-git-upload-pack-advertisement" {
+ return fmt.Errorf("handshake: invalid response Content-Type: %v", ct)
+ }
+
+ pr := newPktLineReader(bytes.NewReader(data))
+ lines, err := pr.Lines()
+ if len(lines) == 1 && lines[0] == "# service=git-upload-pack" {
+ lines, err = pr.Lines()
+ }
+ if err != nil {
+ return fmt.Errorf("handshake: parsing response: %v", err)
+ }
+ caps := make(map[string]string)
+ for _, line := range lines {
+ verb, args, _ := strings.Cut(line, "=")
+ caps[verb] = args
+ }
+ if _, ok := caps["version 2"]; !ok {
+ return fmt.Errorf("handshake: not version 2: %q", lines)
+ }
+ r.caps = caps
+ return nil
+}
+
+// Resolve looks up the given ref and returns the corresponding Hash.
+func (r *Repo) Resolve(ref string) (Hash, error) {
+ if h, err := parseHash(ref); err == nil {
+ return h, nil
+ }
+
+ fail := func(err error) (Hash, error) {
+ return Hash{}, fmt.Errorf("resolve %s: %v", ref, err)
+ }
+ refs, err := r.refs(ref)
+ if err != nil {
+ return fail(err)
+ }
+ for _, known := range refs {
+ if known.name == ref {
+ return known.hash, nil
+ }
+ }
+ return fail(fmt.Errorf("unknown ref"))
+}
+
+// A ref is a single Git reference, like refs/heads/main, refs/tags/v1.0.0, or HEAD.
+type ref struct {
+ name string // "refs/heads/main", "refs/tags/v1.0.0", "HEAD"
+ hash Hash // hexadecimal hash
+}
+
+// refs executes an ls-refs command on the remote server
+// to look up refs with the given prefixes.
+// See https://git-scm.com/docs/protocol-v2#_ls_refs.
+func (r *Repo) refs(prefixes ...string) ([]ref, error) {
+ if _, ok := r.caps["ls-refs"]; !ok {
+ return nil, fmt.Errorf("refs: server does not support ls-refs")
+ }
+
+ var buf bytes.Buffer
+ pw := newPktLineWriter(&buf)
+ pw.WriteString("command=ls-refs")
+ pw.Delim()
+ pw.WriteString("peel")
+ pw.WriteString("symrefs")
+ for _, prefix := range prefixes {
+ pw.WriteString("ref-prefix " + prefix)
+ }
+ pw.Close()
+ postbody := buf.Bytes()
+
+ req, _ := http.NewRequest("POST", r.url+"/git-upload-pack", &buf)
+ req.Header.Set("Content-Type", "application/x-git-upload-pack-request")
+ req.Header.Set("Accept", "application/x-git-upload-pack-result")
+ req.Header.Set("Git-Protocol", "version=2")
+
+ resp, err := http.DefaultClient.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("refs: %v", err)
+ }
+ defer resp.Body.Close()
+ data, err := io.ReadAll(resp.Body)
+ if resp.StatusCode != 200 {
+ return nil, fmt.Errorf("refs: %v\n%s", resp.Status, data)
+ }
+ if err != nil {
+ return nil, fmt.Errorf("refs: reading body: %v", err)
+ }
+ if ct := resp.Header.Get("Content-Type"); ct != "application/x-git-upload-pack-result" {
+ return nil, fmt.Errorf("refs: invalid response Content-Type: %v", ct)
+ }
+
+ var refs []ref
+ lines, err := newPktLineReader(bytes.NewReader(data)).Lines()
+ if err != nil {
+ return nil, fmt.Errorf("refs: parsing response: %v %d\n%s\n%s", err, len(data), hex.Dump(postbody), hex.Dump(data))
+ }
+ for _, line := range lines {
+ hash, rest, ok := strings.Cut(line, " ")
+ if !ok {
+ return nil, fmt.Errorf("refs: parsing response: invalid line: %q", line)
+ }
+ h, err := parseHash(hash)
+ if err != nil {
+ return nil, fmt.Errorf("refs: parsing response: invalid line: %q", line)
+ }
+ name, _, _ := strings.Cut(rest, " ")
+ refs = append(refs, ref{hash: h, name: name})
+ }
+ return refs, nil
+}
+
+// Clone resolves the given ref to a hash and returns the corresponding fs.FS.
+func (r *Repo) Clone(ref string) (Hash, fs.FS, error) {
+ fail := func(err error) (Hash, fs.FS, error) {
+ return Hash{}, nil, fmt.Errorf("clone %s: %v", ref, err)
+ }
+ h, err := r.Resolve(ref)
+ if err != nil {
+ return fail(err)
+ }
+ tfs, err := r.fetch(h)
+ if err != nil {
+ return fail(err)
+ }
+ return h, tfs, nil
+}
+
+// CloneHash returns the fs.FS for the given hash.
+func (r *Repo) CloneHash(h Hash) (fs.FS, error) {
+ tfs, err := r.fetch(h)
+ if err != nil {
+ return nil, fmt.Errorf("clone %s: %v", h, err)
+ }
+ return tfs, nil
+}
+
+// fetch returns the fs.FS for a given hash.
+func (r *Repo) fetch(h Hash) (fs.FS, error) {
+ // Fetch a shallow packfile from the remote server.
+ // Shallow means it only contains the tree at that one commit,
+ // not the entire history of the repo.
+ // See https://git-scm.com/docs/protocol-v2#_fetch.
+ opts, ok := r.caps["fetch"]
+ if !ok {
+ return nil, fmt.Errorf("fetch: server does not support fetch")
+ }
+ if !strings.Contains(" "+opts+" ", " shallow ") {
+ return nil, fmt.Errorf("fetch: server does not support shallow fetch")
+ }
+
+ // Prepare and send request for pack file.
+ var buf bytes.Buffer
+ pw := newPktLineWriter(&buf)
+ pw.WriteString("command=fetch")
+ pw.Delim()
+ pw.WriteString("deepen 1")
+ pw.WriteString("want " + h.String())
+ pw.WriteString("done")
+ pw.Close()
+ postbody := buf.Bytes()
+
+ req, _ := http.NewRequest("POST", r.url+"/git-upload-pack", &buf)
+ req.Header.Set("Content-Type", "application/x-git-upload-pack-request")
+ req.Header.Set("Accept", "application/x-git-upload-pack-result")
+ req.Header.Set("Git-Protocol", "version=2")
+
+ resp, err := http.DefaultClient.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("fetch: %v", err)
+ }
+ defer resp.Body.Close()
+ if resp.StatusCode != 200 {
+ data, _ := io.ReadAll(resp.Body)
+ return nil, fmt.Errorf("fetch: %v\n%s\n%s", resp.Status, data, hex.Dump(postbody))
+ }
+ if ct := resp.Header.Get("Content-Type"); ct != "application/x-git-upload-pack-result" {
+ return nil, fmt.Errorf("fetch: invalid response Content-Type: %v", ct)
+ }
+
+ // Response is sequence of pkt-line packets.
+ // It is plain text output (printed by git) until we find "packfile".
+ // Then it switches to packets with a single prefix byte saying
+ // what kind of data is in that packet:
+ // 1 for pack file data, 2 for text output, 3 for errors.
+ var data []byte
+ pr := newPktLineReader(resp.Body)
+ sawPackfile := false
+ for {
+ line, err := pr.Next()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ return nil, fmt.Errorf("fetch: parsing response: %v", err)
+ }
+ if line == nil { // ignore delimiter
+ continue
+ }
+ if !sawPackfile {
+ // Discard response lines until we get to packfile start.
+ if strings.TrimSuffix(string(line), "\n") == "packfile" {
+ sawPackfile = true
+ }
+ continue
+ }
+ if len(line) == 0 || line[0] == 0 || line[0] > 3 {
+ fmt.Printf("%q\n", line)
+ continue
+ return nil, fmt.Errorf("fetch: malformed response: invalid sideband: %q", line)
+ }
+ switch line[0] {
+ case 1:
+ data = append(data, line[1:]...)
+ case 2:
+ fmt.Printf("%s\n", line[1:])
+ case 3:
+ return nil, fmt.Errorf("fetch: server error: %s", line[1:])
+ }
+ }
+
+ if !bytes.HasPrefix(data, []byte("PACK")) {
+ return nil, fmt.Errorf("fetch: malformed response: not packfile")
+ }
+
+ // Unpack pack file and return fs.FS for the commit we downloaded.
+ var s store
+ if err := unpack(&s, data); err != nil {
+ return nil, fmt.Errorf("fetch: %v", err)
+ }
+ tfs, err := s.commit(h)
+ if err != nil {
+ return nil, fmt.Errorf("fetch: %v", err)
+ }
+ return tfs, nil
+}
+
+// unpack parses data, which is a Git pack-formatted archive,
+// writing every object it contains to the store s.
+//
+// See https://git-scm.com/docs/pack-format for format documentation.
+func unpack(s *store, data []byte) error {
+ // If the store is empty, pre-allocate the length of data.
+ // This should be about the right order of magnitude for the eventual data,
+ // avoiding many growing steps during append.
+ if len(s.data) == 0 {
+ s.data = make([]byte, 0, len(data))
+ }
+
+ // Pack data starts with 12-byte header: "PACK" version[4] nobj[4].
+ if len(data) < 12+20 {
+ return fmt.Errorf("malformed git pack: too short")
+ }
+ hdr := data[:12]
+ vers := binary.BigEndian.Uint32(hdr[4:8])
+ nobj := binary.BigEndian.Uint32(hdr[8:12])
+ if string(hdr[:4]) != "PACK" || vers != 2 && vers != 3 || len(data) < 12+20 || int64(nobj) >= int64(len(data)) {
+ return fmt.Errorf("malformed git pack")
+ }
+ if vers == 3 {
+ return fmt.Errorf("cannot read git pack v3")
+ }
+
+ // Pack data ends with SHA1 of the entire pack.
+ sum := sha1.Sum(data[:len(data)-20])
+ if !bytes.Equal(sum[:], data[len(data)-20:]) {
+ return fmt.Errorf("malformed git pack: bad checksum")
+ }
+
+ // Object data is everything between hdr and ending SHA1.
+ // Unpack every object into the store.
+ objs := data[12 : len(data)-20]
+ off := 0
+ for i := 0; i < int(nobj); i++ {
+ _, _, _, encSize, err := unpackObject(s, objs, off)
+ if err != nil {
+ return fmt.Errorf("unpack: malformed git pack: %v", err)
+ }
+ off += encSize
+ }
+ if off != len(objs) {
+ return fmt.Errorf("malformed git pack: junk after objects")
+ }
+ return nil
+}
+
+// unpackObject unpacks the object at objs[off:] and writes it to the store s.
+// It returns the type, hash, and content of the object, as well as the encoded size,
+// meaning the number of bytes at the start of objs[off:] that this record occupies.
+func unpackObject(s *store, objs []byte, off int) (typ objType, h Hash, content []byte, encSize int, err error) {
+ fail := func(err error) (objType, Hash, []byte, int, error) {
+ return 0, Hash{}, nil, 0, err
+ }
+ if off < 0 || off >= len(objs) {
+ return fail(fmt.Errorf("invalid object offset"))
+ }
+
+ // Object starts with varint-encoded type and length n.
+ // (The length n is the length of the compressed data that follows,
+ // not the length of the actual object.)
+ u, size := binary.Uvarint(objs[off:])
+ if size <= 0 {
+ return fail(fmt.Errorf("invalid object: bad varint header"))
+ }
+ typ = objType((u >> 4) & 7)
+ n := int(u&15 | u>>7<<4)
+
+ // Git often stores objects that differ very little (different revs of a file).
+ // It can save space by encoding one as "start with this other object and apply these diffs".
+ // There are two ways to specify "this other object": an object ref (20-byte SHA1)
+ // or as a relative offset to an earlier position in the objs slice.
+ // For either of these, we need to fetch the other object's type and data (deltaTyp and deltaBase).
+ // The Git docs call this the "deltified representation".
+ var deltaTyp objType
+ var deltaBase []byte
+ switch typ {
+ case objRefDelta:
+ if len(objs)-(off+size) < 20 {
+ return fail(fmt.Errorf("invalid object: bad delta ref"))
+ }
+ // Base block identified by SHA1 of an already unpacked hash.
+ var h Hash
+ copy(h[:], objs[off+size:])
+ size += 20
+ deltaTyp, deltaBase = s.object(h)
+ if deltaTyp == 0 {
+ return fail(fmt.Errorf("invalid object: unknown delta ref %v", h))
+ }
+
+ case objOfsDelta:
+ i := off + size
+ if len(objs)-i < 20 {
+ return fail(fmt.Errorf("invalid object: too short"))
+ }
+ // Base block identified by relative offset to earlier position in objs,
+ // using a varint-like but not-quite-varint encoding.
+ // Look for "offset encoding:" in https://git-scm.com/docs/pack-format.
+ d := int64(objs[i] & 0x7f)
+ for objs[i]&0x80 != 0 {
+ i++
+ if i-(off+size) > 10 {
+ return fail(fmt.Errorf("invalid object: malformed delta offset"))
+ }
+ d = d<<7 | int64(objs[i]&0x7f)
+ d += 1 << 7
+ }
+ i++
+ size = i - off
+
+ // Re-unpack the object at the earlier offset to find its type and content.
+ if d == 0 || d > int64(off) {
+ return fail(fmt.Errorf("invalid object: bad delta offset"))
+ }
+ var err error
+ deltaTyp, _, deltaBase, _, err = unpackObject(s, objs, off-int(d))
+ if err != nil {
+ return fail(fmt.Errorf("invalid object: bad delta offset"))
+ }
+ }
+
+ // The main encoded data is a zlib-compressed stream.
+ br := bytes.NewReader(objs[off+size:])
+ zr, err := zlib.NewReader(br)
+ if err != nil {
+ return fail(fmt.Errorf("invalid object deflate: %v", err))
+ }
+ data, err := io.ReadAll(zr)
+ if err != nil {
+ return fail(fmt.Errorf("invalid object: bad deflate: %v", err))
+ }
+ if len(data) != n {
+ return fail(fmt.Errorf("invalid object: deflate size %d != %d", len(data), n))
+ }
+ encSize = len(objs[off:]) - br.Len()
+
+ // If we fetched a base object above, the stream is an encoded delta.
+ // Otherwise it is the raw data.
+ switch typ {
+ default:
+ return fail(fmt.Errorf("invalid object: unknown object type"))
+ case objCommit, objTree, objBlob, objTag:
+ // ok
+ case objRefDelta, objOfsDelta:
+ // Actual object type is the type of the base object.
+ typ = deltaTyp
+
+ // Delta encoding starts with size of base object and size of new object.
+ baseSize, s := binary.Uvarint(data)
+ data = data[s:]
+ if baseSize != uint64(len(deltaBase)) {
+ return fail(fmt.Errorf("invalid object: mismatched delta src size"))
+ }
+ targSize, s := binary.Uvarint(data)
+ data = data[s:]
+
+ // Apply delta to base object, producing new object.
+ targ := make([]byte, targSize)
+ if err := applyDelta(targ, deltaBase, data); err != nil {
+ return fail(fmt.Errorf("invalid object: %v", err))
+ }
+ data = targ
+ }
+
+ h, data = s.add(typ, data)
+ return typ, h, data, encSize, nil
+}
+
+// applyDelta applies the delta encoding to src, producing dst,
+// which has already been allocated to the expected final size.
+// See https://git-scm.com/docs/pack-format#_deltified_representation for docs.
+func applyDelta(dst, src, delta []byte) error {
+ for len(delta) > 0 {
+ // Command byte says what comes next.
+ cmd := delta[0]
+ delta = delta[1:]
+ switch {
+ case cmd == 0:
+ // cmd == 0 is reserved.
+ return fmt.Errorf("invalid delta cmd")
+
+ case cmd&0x80 != 0:
+ // Copy from base object, 4-byte offset, 3-byte size.
+ // But any zero byte in the offset or size can be omitted.
+ // The bottom 7 bits of cmd say which offset/size bytes are present.
+ var off, size int64
+ for i := uint(0); i < 4; i++ {
+ if cmd&(1<<i) != 0 {
+ off |= int64(delta[0]) << (8 * i)
+ delta = delta[1:]
+ }
+ }
+ for i := uint(0); i < 3; i++ {
+ if cmd&(0x10<<i) != 0 {
+ size |= int64(delta[0]) << (8 * i)
+ delta = delta[1:]
+ }
+ }
+ // Size 0 means size 0x10000 for some reason. (!)
+ if size == 0 {
+ size = 0x10000
+ }
+ copy(dst[:size], src[off:off+size])
+ dst = dst[size:]
+
+ default:
+ // Up to 0x7F bytes of literal data, length in bottom 7 bits of cmd.
+ n := int(cmd)
+ copy(dst[:n], delta[:n])
+ dst = dst[n:]
+ delta = delta[n:]
+ }
+ }
+ if len(dst) != 0 {
+ return fmt.Errorf("delta encoding too short")
+ }
+ return nil
+}
+
+// A pktLineReader reads Git pkt-line-formatted packets.
+//
+// Each n-byte packet is preceded by a 4-digit hexadecimal length
+// encoding n+4 (the length counts its own bytes), like "0006a\n" for "a\n".
+//
+// A packet starting with 0000 is a so-called flush packet.
+// A packet starting with 0001 is a delimiting marker,
+// which usually marks the end of a sequence in the stream.
+//
+// See https://git-scm.com/docs/protocol-common#_pkt_line_format
+// for the official documentation, although it fails to mention the 0001 packets.
+type pktLineReader struct {
+ b *bufio.Reader
+ size [4]byte
+}
+
+// newPktLineReader returns a new pktLineReader reading from r.
+func newPktLineReader(r io.Reader) *pktLineReader {
+ return &pktLineReader{b: bufio.NewReader(r)}
+}
+
+// Next returns the payload of the next packet from the stream.
+// If the next packet is a flush packet (length 0000), Next returns nil, io.EOF.
+// If the next packet is a delimiter packet (length 0001), Next returns nil, nil.
+// If the data stream has ended, Next returns nil, io.ErrUnexpectedEOF.
+func (r *pktLineReader) Next() ([]byte, error) {
+ _, err := io.ReadFull(r.b, r.size[:])
+ if err != nil {
+ if err == io.EOF {
+ err = io.ErrUnexpectedEOF
+ }
+ return nil, err
+ }
+ n, err := strconv.ParseUint(string(r.size[:]), 16, 0)
+ if err != nil || n == 2 || n == 3 {
+ return nil, fmt.Errorf("malformed pkt-line")
+ }
+ if n == 1 {
+ return nil, nil // delimiter
+ }
+ if n == 0 {
+ return nil, io.EOF
+ }
+ buf := make([]byte, n-4)
+ _, err = io.ReadFull(r.b, buf)
+ if err != nil {
+ if err == io.EOF {
+ err = io.ErrUnexpectedEOF
+ }
+ return nil, err
+ }
+ return buf, nil
+}
+
+// Lines reads packets from r until a flush packet.
+// It returns a string for each packet, with any trailing newline trimmed.
+func (r *pktLineReader) Lines() ([]string, error) {
+ var lines []string
+ for {
+ line, err := r.Next()
+ if err != nil {
+ if err == io.EOF {
+ err = nil
+ }
+ return lines, err
+ }
+ lines = append(lines, strings.TrimSuffix(string(line), "\n"))
+ }
+}
+
+// A pktLineWriter writes Git pkt-line-formatted packets.
+// See pktLineReader for a description of the packet format.
+type pktLineWriter struct {
+ b *bufio.Writer
+ size [4]byte
+}
+
+// newPktLineWriter returns a new pktLineWriter writing to w.
+func newPktLineWriter(w io.Writer) *pktLineWriter {
+ return &pktLineWriter{b: bufio.NewWriter(w)}
+}
+
+// writeSize writes a four-digit hexadecimal length packet for n.
+// Typically n is len(data)+4.
+func (w *pktLineWriter) writeSize(n int) {
+ hex := "0123456789abcdef"
+ w.size[0] = hex[n>>12]
+ w.size[1] = hex[(n>>8)&0xf]
+ w.size[2] = hex[(n>>4)&0xf]
+ w.size[3] = hex[(n>>0)&0xf]
+ w.b.Write(w.size[:])
+}
+
+// Write writes b as a single packet.
+func (w *pktLineWriter) Write(b []byte) (int, error) {
+ n := len(b)
+ if n+4 > 0xffff {
+ return 0, fmt.Errorf("write too large")
+ }
+ w.writeSize(n + 4)
+ w.b.Write(b)
+ return n, nil
+}
+
+// WriteString writes s as a single packet.
+func (w *pktLineWriter) WriteString(s string) (int, error) {
+ n := len(s)
+ if n+4 > 0xffff {
+ return 0, fmt.Errorf("write too large")
+ }
+ w.writeSize(n + 4)
+ w.b.WriteString(s)
+ return n, nil
+}
+
+// Close writes a terminating flush packet
+// and flushes buffered data to the underlying writer.
+func (w *pktLineWriter) Close() error {
+ w.b.WriteString("0000")
+ w.b.Flush()
+ return nil
+}
+
+// Delim writes a delimiter packet.
+func (w *pktLineWriter) Delim() {
+ w.b.WriteString("0001")
+}