| // Copyright 2021 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package gitfs |
| |
| import ( |
| "bytes" |
| "crypto/sha1" |
| "encoding/hex" |
| "fmt" |
| hashpkg "hash" |
| "io" |
| "io/fs" |
| "runtime/debug" |
| "time" |
| ) |
| |
| // A Hash is a SHA-1 Hash identifying a particular Git object. |
| type Hash [20]byte |
| |
| func (h Hash) String() string { return fmt.Sprintf("%x", h[:]) } |
| |
| // parseHash parses the (full-length) Git hash text. |
| func parseHash(text string) (Hash, error) { |
| x, err := hex.DecodeString(text) |
| if err != nil || len(x) != 20 { |
| return Hash{}, fmt.Errorf("invalid hash") |
| } |
| var h Hash |
| copy(h[:], x) |
| return h, nil |
| } |
| |
| // An objType is an object type indicator. |
| // The values are the ones used in Git pack encoding |
| // (https://git-scm.com/docs/pack-format#_object_types). |
| type objType int |
| |
| const ( |
| objNone objType = 0 |
| objCommit objType = 1 |
| objTree objType = 2 |
| objBlob objType = 3 |
| objTag objType = 4 |
| // 5 undefined |
| objOfsDelta objType = 6 |
| objRefDelta objType = 7 |
| ) |
| |
| var objTypes = [...]string{ |
| objCommit: "commit", |
| objTree: "tree", |
| objBlob: "blob", |
| objTag: "tag", |
| } |
| |
| func (t objType) String() string { |
| if t < 0 || int(t) >= len(objTypes) || objTypes[t] == "" { |
| return fmt.Sprintf("objType(%d)", int(t)) |
| } |
| return objTypes[t] |
| } |
| |
| // A dirEntry is a Git directory entry parsed from a tree object. |
| type dirEntry struct { |
| mode int |
| name []byte |
| hash Hash |
| } |
| |
| // parseDirEntry parses the next directory entry from data, |
| // returning the entry and the number of bytes it occupied. |
| // If data is malformed, parseDirEntry returns dirEntry{}, 0. |
| func parseDirEntry(data []byte) (dirEntry, int) { |
| // Unclear where or if this format is documented by Git. |
| // Each directory entry is an octal mode, then a space, |
| // then a file name, then a NUL byte, then a 20-byte binary hash. |
| // Note that 'git cat-file -p <treehash>' shows a textual representation |
| // of this data, not the actual binary data. To see the binary data, |
| // use 'echo <treehash> | git cat-file --batch | hexdump -C'. |
| mode := 0 |
| i := 0 |
| for i < len(data) && data[i] != ' ' { |
| c := data[i] |
| if c < '0' || '7' < c { |
| return dirEntry{}, 0 |
| } |
| mode = mode*8 + int(c) - '0' |
| i++ |
| } |
| i++ |
| j := i |
| for j < len(data) && data[j] != 0 { |
| j++ |
| } |
| if len(data)-j < 1+20 { |
| return dirEntry{}, 0 |
| } |
| name := data[i:j] |
| var h Hash |
| copy(h[:], data[j+1:]) |
| return dirEntry{mode, name, h}, j + 1 + 20 |
| } |
| |
| // treeLookup looks in the tree object data for the directory entry with the given name, |
| // returning the mode and hash associated with the name. |
| func treeLookup(data []byte, name string) (mode int, h Hash, ok bool) { |
| // Note: The tree object directory entries are sorted by name, |
| // but the directory entry data is not self-synchronizing, |
| // so it's not possible to be clever and use a binary search here. |
| for len(data) > 0 { |
| e, size := parseDirEntry(data) |
| if size == 0 { |
| break |
| } |
| if string(e.name) == name { |
| return e.mode, e.hash, true |
| } |
| data = data[size:] |
| } |
| return 0, Hash{}, false |
| } |
| |
| // commitKeyValue parses the commit object data |
| // looking for the first header line "key: value" matching the given key. |
| // It returns the associated value. |
| // (Try 'git cat-file -p <commithash>' to see the commit data format.) |
| func commitKeyValue(data []byte, key string) ([]byte, bool) { |
| for i := 0; i < len(data); i++ { |
| if i == 0 || data[i-1] == '\n' { |
| if data[i] == '\n' { |
| break |
| } |
| if len(data)-i >= len(key)+1 && data[len(key)] == ' ' && string(data[:len(key)]) == key { |
| val := data[len(key)+1:] |
| for j := 0; j < len(val); j++ { |
| if val[j] == '\n' { |
| val = val[:j] |
| break |
| } |
| } |
| return val, true |
| } |
| } |
| } |
| return nil, false |
| } |
| |
| // A store is a collection of Git objects, indexed for lookup by hash. |
| type store struct { |
| sha1 hashpkg.Hash // reused hash state |
| index map[Hash]stored // lookup index |
| data []byte // concatenation of all object data |
| } |
| |
| // A stored describes a single stored object. |
| type stored struct { |
| typ objType // object type |
| off int // object data is store.data[off:off+len] |
| len int |
| } |
| |
| // add adds an object with the given type and content to s, returning its Hash. |
| // If the object is already stored in s, add succeeds but doesn't store a second copy. |
| func (s *store) add(typ objType, data []byte) (Hash, []byte) { |
| if s.sha1 == nil { |
| s.sha1 = sha1.New() |
| } |
| |
| // Compute Git hash for object. |
| s.sha1.Reset() |
| fmt.Fprintf(s.sha1, "%s %d\x00", typ, len(data)) |
| s.sha1.Write(data) |
| var h Hash |
| s.sha1.Sum(h[:0]) // appends into h |
| |
| e, ok := s.index[h] |
| if !ok { |
| if s.index == nil { |
| s.index = make(map[Hash]stored) |
| } |
| e = stored{typ, len(s.data), len(data)} |
| s.index[h] = e |
| s.data = append(s.data, data...) |
| } |
| return h, s.data[e.off : e.off+e.len] |
| } |
| |
| // object returns the type and data for the object with hash h. |
| // If there is no object with hash h, object returns 0, nil. |
| func (s *store) object(h Hash) (typ objType, data []byte) { |
| d, ok := s.index[h] |
| if !ok { |
| return 0, nil |
| } |
| return d.typ, s.data[d.off : d.off+d.len] |
| } |
| |
| // commit returns a treeFS for the file system tree associated with the given commit hash. |
| func (s *store) commit(h Hash) (*treeFS, error) { |
| // The commit object data starts with key-value pairs |
| typ, data := s.object(h) |
| if typ == objNone { |
| return nil, fmt.Errorf("commit %s: no such hash", h) |
| } |
| if typ != objCommit { |
| return nil, fmt.Errorf("commit %s: unexpected type %s", h, typ) |
| } |
| treeHash, ok := commitKeyValue(data, "tree") |
| if !ok { |
| return nil, fmt.Errorf("commit %s: no tree", h) |
| } |
| h, err := parseHash(string(treeHash)) |
| if err != nil { |
| return nil, fmt.Errorf("commit %s: invalid tree %q", h, treeHash) |
| } |
| return &treeFS{s, h}, nil |
| } |
| |
| // A treeFS is an fs.FS serving a Git file system tree rooted at a given tree object hash. |
| type treeFS struct { |
| s *store |
| tree Hash // root tree |
| } |
| |
| // Open opens the given file or directory, implementing the fs.FS Open method. |
| func (t *treeFS) Open(name string) (f fs.File, err error) { |
| defer func() { |
| if e := recover(); e != nil { |
| f = nil |
| err = fmt.Errorf("gitfs panic: %v\n%s", e, debug.Stack()) |
| } |
| }() |
| |
| // Process each element in the slash-separated path, producing hash identified by name. |
| h := t.tree |
| start := 0 // index of start of final path element in name |
| if name != "." { |
| for i := 0; i <= len(name); i++ { |
| if i == len(name) || name[i] == '/' { |
| // Look up name in current tree object h. |
| typ, data := t.s.object(h) |
| if typ != objTree { |
| return nil, &fs.PathError{Path: name, Op: "open", Err: fs.ErrNotExist} |
| } |
| _, th, ok := treeLookup(data, name[start:i]) |
| if !ok { |
| return nil, &fs.PathError{Path: name, Op: "open", Err: fs.ErrNotExist} |
| } |
| h = th |
| if i < len(name) { |
| start = i + 1 |
| } |
| } |
| } |
| } |
| |
| // The hash h is the hash for name. Load its object. |
| typ, data := t.s.object(h) |
| info := fileInfo{name, name[start:], 0, 0} |
| if typ == objBlob { |
| // Regular file. |
| info.mode = 0444 |
| info.size = int64(len(data)) |
| return &blobFile{info, bytes.NewReader(data)}, nil |
| } |
| if typ == objTree { |
| // Directory. |
| info.mode = fs.ModeDir | 0555 |
| return &dirFile{t.s, info, data, 0}, nil |
| } |
| return nil, &fs.PathError{Path: name, Op: "open", Err: fmt.Errorf("unexpected git object type %s", typ)} |
| } |
| |
| // fileInfo implements fs.FileInfo. |
| type fileInfo struct { |
| path string |
| name string |
| mode fs.FileMode |
| size int64 |
| } |
| |
| func (i *fileInfo) Name() string { return i.name } |
| func (i *fileInfo) Type() fs.FileMode { return i.mode & fs.ModeType } |
| func (i *fileInfo) Mode() fs.FileMode { return i.mode } |
| func (i *fileInfo) Sys() interface{} { return nil } |
| func (i *fileInfo) IsDir() bool { return i.mode&fs.ModeDir != 0 } |
| func (i *fileInfo) Size() int64 { return i.size } |
| func (i *fileInfo) Info() (fs.FileInfo, error) { return i, nil } |
| func (i *fileInfo) ModTime() time.Time { return time.Time{} } |
| |
| func (i *fileInfo) err(op string, err error) error { |
| return &fs.PathError{Path: i.path, Op: op, Err: err} |
| } |
| |
| // A blobFile implements fs.File for a regular file. |
| // The embedded bytes.Reader provides Read, Seek and other I/O methods. |
| type blobFile struct { |
| info fileInfo |
| *bytes.Reader |
| } |
| |
| func (f *blobFile) Close() error { return nil } |
| func (f *blobFile) Stat() (fs.FileInfo, error) { return &f.info, nil } |
| |
| // A dirFile implements fs.File for a directory. |
| type dirFile struct { |
| s *store |
| info fileInfo |
| data []byte |
| off int |
| } |
| |
| func (f *dirFile) Close() error { return nil } |
| func (f *dirFile) Read([]byte) (int, error) { return 0, f.info.err("read", fs.ErrInvalid) } |
| func (f *dirFile) Stat() (fs.FileInfo, error) { return &f.info, nil } |
| |
| func (f *dirFile) Seek(offset int64, whence int) (int64, error) { |
| if offset == 0 && whence == 0 { |
| // Allow rewind to start of directory. |
| f.off = 0 |
| return 0, nil |
| } |
| return 0, f.info.err("seek", fs.ErrInvalid) |
| } |
| |
| func (f *dirFile) ReadDir(n int) (list []fs.DirEntry, err error) { |
| defer func() { |
| if e := recover(); e != nil { |
| list = nil |
| err = fmt.Errorf("gitfs panic: %v\n%s", e, debug.Stack()) |
| } |
| }() |
| |
| for (n <= 0 || len(list) < n) && f.off < len(f.data) { |
| e, size := parseDirEntry(f.data[f.off:]) |
| if size == 0 { |
| break |
| } |
| f.off += size |
| typ, data := f.s.object(e.hash) |
| mode := fs.FileMode(0444) |
| if typ == objTree { |
| mode = fs.ModeDir | 0555 |
| } |
| infoSize := int64(0) |
| if typ == objBlob { |
| infoSize = int64(len(data)) |
| } |
| name := string(e.name) |
| list = append(list, &fileInfo{name, name, mode, infoSize}) |
| } |
| if len(list) == 0 && n > 0 { |
| return list, io.EOF |
| } |
| return list, nil |
| } |