| // Copyright 2019 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package sumdb |
| |
| import ( |
| "bytes" |
| "errors" |
| "fmt" |
| "path" |
| "strings" |
| "sync" |
| "sync/atomic" |
| |
| "golang.org/x/mod/module" |
| "golang.org/x/mod/sumdb/note" |
| "golang.org/x/mod/sumdb/tlog" |
| ) |
| |
| // A ClientOps provides the external operations |
| // (file caching, HTTP fetches, and so on) needed by the Client. |
| // The methods must be safe for concurrent use by multiple goroutines. |
| type ClientOps interface { |
| // ReadRemote reads and returns the content served at the given path |
| // on the remote database server. The path begins with "/lookup" or "/tile/", |
| // and there is no need to parse the path in any way. |
| // It is the implementation's responsibility to turn that path into a full URL |
| // and make the HTTP request. ReadRemote should return an error for |
| // any non-200 HTTP response status. |
| ReadRemote(path string) ([]byte, error) |
| |
| // ReadConfig reads and returns the content of the named configuration file. |
| // There are only a fixed set of configuration files. |
| // |
| // "key" returns a file containing the verifier key for the server. |
| // |
| // serverName + "/latest" returns a file containing the latest known |
| // signed tree from the server. |
| // To signal that the client wishes to start with an "empty" signed tree, |
| // ReadConfig can return a successful empty result (0 bytes of data). |
| ReadConfig(file string) ([]byte, error) |
| |
| // WriteConfig updates the content of the named configuration file, |
| // changing it from the old []byte to the new []byte. |
| // If the old []byte does not match the stored configuration, |
| // WriteConfig must return ErrWriteConflict. |
| // Otherwise, WriteConfig should atomically replace old with new. |
| // The "key" configuration file is never written using WriteConfig. |
| WriteConfig(file string, old, new []byte) error |
| |
| // ReadCache reads and returns the content of the named cache file. |
| // Any returned error will be treated as equivalent to the file not existing. |
| // There can be arbitrarily many cache files, such as: |
| // serverName/lookup/pkg@version |
| // serverName/tile/8/1/x123/456 |
| ReadCache(file string) ([]byte, error) |
| |
| // WriteCache writes the named cache file. |
| WriteCache(file string, data []byte) |
| |
| // Log prints the given log message (such as with log.Print) |
| Log(msg string) |
| |
| // SecurityError prints the given security error log message. |
| // The Client returns ErrSecurity from any operation that invokes SecurityError, |
| // but the return value is mainly for testing. In a real program, |
| // SecurityError should typically print the message and call log.Fatal or os.Exit. |
| SecurityError(msg string) |
| } |
| |
| // ErrWriteConflict signals a write conflict during Client.WriteConfig. |
| var ErrWriteConflict = errors.New("write conflict") |
| |
| // ErrSecurity is returned by Client operations that invoke Client.SecurityError. |
| var ErrSecurity = errors.New("security error: misbehaving server") |
| |
| // A Client is a client connection to a checksum database. |
| // All the methods are safe for simultaneous use by multiple goroutines. |
| type Client struct { |
| ops ClientOps // access to operations in the external world |
| |
| didLookup uint32 |
| |
| // one-time initialized data |
| initOnce sync.Once |
| initErr error // init error, if any |
| name string // name of accepted verifier |
| verifiers note.Verifiers // accepted verifiers (just one, but Verifiers for note.Open) |
| tileReader tileReader |
| tileHeight int |
| nosumdb string |
| |
| record parCache // cache of record lookup, keyed by path@vers |
| tileCache parCache // cache of c.readTile, keyed by tile |
| |
| latestMu sync.Mutex |
| latest tlog.Tree // latest known tree head |
| latestMsg []byte // encoded signed note for latest |
| |
| tileSavedMu sync.Mutex |
| tileSaved map[tlog.Tile]bool // which tiles have been saved using c.ops.WriteCache already |
| } |
| |
| // NewClient returns a new Client using the given Client. |
| func NewClient(ops ClientOps) *Client { |
| return &Client{ |
| ops: ops, |
| } |
| } |
| |
| // init initiailzes the client (if not already initialized) |
| // and returns any initialization error. |
| func (c *Client) init() error { |
| c.initOnce.Do(c.initWork) |
| return c.initErr |
| } |
| |
| // initWork does the actual initialization work. |
| func (c *Client) initWork() { |
| defer func() { |
| if c.initErr != nil { |
| c.initErr = fmt.Errorf("initializing sumdb.Client: %v", c.initErr) |
| } |
| }() |
| |
| c.tileReader.c = c |
| if c.tileHeight == 0 { |
| c.tileHeight = 8 |
| } |
| c.tileSaved = make(map[tlog.Tile]bool) |
| |
| vkey, err := c.ops.ReadConfig("key") |
| if err != nil { |
| c.initErr = err |
| return |
| } |
| verifier, err := note.NewVerifier(strings.TrimSpace(string(vkey))) |
| if err != nil { |
| c.initErr = err |
| return |
| } |
| c.verifiers = note.VerifierList(verifier) |
| c.name = verifier.Name() |
| |
| data, err := c.ops.ReadConfig(c.name + "/latest") |
| if err != nil { |
| c.initErr = err |
| return |
| } |
| if err := c.mergeLatest(data); err != nil { |
| c.initErr = err |
| return |
| } |
| } |
| |
| // SetTileHeight sets the tile height for the Client. |
| // Any call to SetTileHeight must happen before the first call to Lookup. |
| // If SetTileHeight is not called, the Client defaults to tile height 8. |
| // SetTileHeight can be called at most once, |
| // and if so it must be called before the first call to Lookup. |
| func (c *Client) SetTileHeight(height int) { |
| if atomic.LoadUint32(&c.didLookup) != 0 { |
| panic("SetTileHeight used after Lookup") |
| } |
| if height <= 0 { |
| panic("invalid call to SetTileHeight") |
| } |
| if c.tileHeight != 0 { |
| panic("multiple calls to SetTileHeight") |
| } |
| c.tileHeight = height |
| } |
| |
| // SetGONOSUMDB sets the list of comma-separated GONOSUMDB patterns for the Client. |
| // For any module path matching one of the patterns, |
| // Lookup will return ErrGONOSUMDB. |
| // SetGONOSUMDB can be called at most once, |
| // and if so it must be called before the first call to Lookup. |
| func (c *Client) SetGONOSUMDB(list string) { |
| if atomic.LoadUint32(&c.didLookup) != 0 { |
| panic("SetGONOSUMDB used after Lookup") |
| } |
| if c.nosumdb != "" { |
| panic("multiple calls to SetGONOSUMDB") |
| } |
| c.nosumdb = list |
| } |
| |
| // ErrGONOSUMDB is returned by Lookup for paths that match |
| // a pattern listed in the GONOSUMDB list (set by SetGONOSUMDB, |
| // usually from the environment variable). |
| var ErrGONOSUMDB = errors.New("skipped (listed in GONOSUMDB)") |
| |
| func (c *Client) skip(target string) bool { |
| return globsMatchPath(c.nosumdb, target) |
| } |
| |
| // globsMatchPath reports whether any path prefix of target |
| // matches one of the glob patterns (as defined by path.Match) |
| // in the comma-separated globs list. |
| // It ignores any empty or malformed patterns in the list. |
| func globsMatchPath(globs, target string) bool { |
| for globs != "" { |
| // Extract next non-empty glob in comma-separated list. |
| var glob string |
| if i := strings.Index(globs, ","); i >= 0 { |
| glob, globs = globs[:i], globs[i+1:] |
| } else { |
| glob, globs = globs, "" |
| } |
| if glob == "" { |
| continue |
| } |
| |
| // A glob with N+1 path elements (N slashes) needs to be matched |
| // against the first N+1 path elements of target, |
| // which end just before the N+1'th slash. |
| n := strings.Count(glob, "/") |
| prefix := target |
| // Walk target, counting slashes, truncating at the N+1'th slash. |
| for i := 0; i < len(target); i++ { |
| if target[i] == '/' { |
| if n == 0 { |
| prefix = target[:i] |
| break |
| } |
| n-- |
| } |
| } |
| if n > 0 { |
| // Not enough prefix elements. |
| continue |
| } |
| matched, _ := path.Match(glob, prefix) |
| if matched { |
| return true |
| } |
| } |
| return false |
| } |
| |
| // Lookup returns the go.sum lines for the given module path and version. |
| // The version may end in a /go.mod suffix, in which case Lookup returns |
| // the go.sum lines for the module's go.mod-only hash. |
| func (c *Client) Lookup(path, vers string) (lines []string, err error) { |
| atomic.StoreUint32(&c.didLookup, 1) |
| |
| if c.skip(path) { |
| return nil, ErrGONOSUMDB |
| } |
| |
| defer func() { |
| if err != nil { |
| err = fmt.Errorf("%s@%s: %v", path, vers, err) |
| } |
| }() |
| |
| if err := c.init(); err != nil { |
| return nil, err |
| } |
| |
| // Prepare encoded cache filename / URL. |
| epath, err := module.EscapePath(path) |
| if err != nil { |
| return nil, err |
| } |
| evers, err := module.EscapeVersion(strings.TrimSuffix(vers, "/go.mod")) |
| if err != nil { |
| return nil, err |
| } |
| remotePath := "/lookup/" + epath + "@" + evers |
| file := c.name + remotePath |
| |
| // Fetch the data. |
| // The lookupCache avoids redundant ReadCache/GetURL operations |
| // (especially since go.sum lines tend to come in pairs for a given |
| // path and version) and also avoids having multiple of the same |
| // request in flight at once. |
| type cached struct { |
| data []byte |
| err error |
| } |
| result := c.record.Do(file, func() interface{} { |
| // Try the on-disk cache, or else get from web. |
| writeCache := false |
| data, err := c.ops.ReadCache(file) |
| if err != nil { |
| data, err = c.ops.ReadRemote(remotePath) |
| if err != nil { |
| return cached{nil, err} |
| } |
| writeCache = true |
| } |
| |
| // Validate the record before using it for anything. |
| id, text, treeMsg, err := tlog.ParseRecord(data) |
| if err != nil { |
| return cached{nil, err} |
| } |
| if err := c.mergeLatest(treeMsg); err != nil { |
| return cached{nil, err} |
| } |
| if err := c.checkRecord(id, text); err != nil { |
| return cached{nil, err} |
| } |
| |
| // Now that we've validated the record, |
| // save it to the on-disk cache (unless that's where it came from). |
| if writeCache { |
| c.ops.WriteCache(file, data) |
| } |
| |
| return cached{data, nil} |
| }).(cached) |
| if result.err != nil { |
| return nil, result.err |
| } |
| |
| // Extract the lines for the specific version we want |
| // (with or without /go.mod). |
| prefix := path + " " + vers + " " |
| var hashes []string |
| for _, line := range strings.Split(string(result.data), "\n") { |
| if strings.HasPrefix(line, prefix) { |
| hashes = append(hashes, line) |
| } |
| } |
| return hashes, nil |
| } |
| |
| // mergeLatest merges the tree head in msg |
| // with the Client's current latest tree head, |
| // ensuring the result is a consistent timeline. |
| // If the result is inconsistent, mergeLatest calls c.ops.SecurityError |
| // with a detailed security error message and then |
| // (only if c.ops.SecurityError does not exit the program) returns ErrSecurity. |
| // If the Client's current latest tree head moves forward, |
| // mergeLatest updates the underlying configuration file as well, |
| // taking care to merge any independent updates to that configuration. |
| func (c *Client) mergeLatest(msg []byte) error { |
| // Merge msg into our in-memory copy of the latest tree head. |
| when, err := c.mergeLatestMem(msg) |
| if err != nil { |
| return err |
| } |
| if when != msgFuture { |
| // msg matched our present or was in the past. |
| // No change to our present, so no update of config file. |
| return nil |
| } |
| |
| // Flush our extended timeline back out to the configuration file. |
| // If the configuration file has been updated in the interim, |
| // we need to merge any updates made there as well. |
| // Note that writeConfig is an atomic compare-and-swap. |
| for { |
| msg, err := c.ops.ReadConfig(c.name + "/latest") |
| if err != nil { |
| return err |
| } |
| when, err := c.mergeLatestMem(msg) |
| if err != nil { |
| return err |
| } |
| if when != msgPast { |
| // msg matched our present or was from the future, |
| // and now our in-memory copy matches. |
| return nil |
| } |
| |
| // msg (== config) is in the past, so we need to update it. |
| c.latestMu.Lock() |
| latestMsg := c.latestMsg |
| c.latestMu.Unlock() |
| if err := c.ops.WriteConfig(c.name+"/latest", msg, latestMsg); err != ErrWriteConflict { |
| // Success or a non-write-conflict error. |
| return err |
| } |
| } |
| } |
| |
| const ( |
| msgPast = 1 + iota |
| msgNow |
| msgFuture |
| ) |
| |
| // mergeLatestMem is like mergeLatest but is only concerned with |
| // updating the in-memory copy of the latest tree head (c.latest) |
| // not the configuration file. |
| // The when result explains when msg happened relative to our |
| // previous idea of c.latest: |
| // msgPast means msg was from before c.latest, |
| // msgNow means msg was exactly c.latest, and |
| // msgFuture means msg was from after c.latest, which has now been updated. |
| func (c *Client) mergeLatestMem(msg []byte) (when int, err error) { |
| if len(msg) == 0 { |
| // Accept empty msg as the unsigned, empty timeline. |
| c.latestMu.Lock() |
| latest := c.latest |
| c.latestMu.Unlock() |
| if latest.N == 0 { |
| return msgNow, nil |
| } |
| return msgPast, nil |
| } |
| |
| note, err := note.Open(msg, c.verifiers) |
| if err != nil { |
| return 0, fmt.Errorf("reading tree note: %v\nnote:\n%s", err, msg) |
| } |
| tree, err := tlog.ParseTree([]byte(note.Text)) |
| if err != nil { |
| return 0, fmt.Errorf("reading tree: %v\ntree:\n%s", err, note.Text) |
| } |
| |
| // Other lookups may be calling mergeLatest with other heads, |
| // so c.latest is changing underfoot. We don't want to hold the |
| // c.mu lock during tile fetches, so loop trying to update c.latest. |
| c.latestMu.Lock() |
| latest := c.latest |
| latestMsg := c.latestMsg |
| c.latestMu.Unlock() |
| |
| for { |
| // If the tree head looks old, check that it is on our timeline. |
| if tree.N <= latest.N { |
| if err := c.checkTrees(tree, msg, latest, latestMsg); err != nil { |
| return 0, err |
| } |
| if tree.N < latest.N { |
| return msgPast, nil |
| } |
| return msgNow, nil |
| } |
| |
| // The tree head looks new. Check that we are on its timeline and try to move our timeline forward. |
| if err := c.checkTrees(latest, latestMsg, tree, msg); err != nil { |
| return 0, err |
| } |
| |
| // Install our msg if possible. |
| // Otherwise we will go around again. |
| c.latestMu.Lock() |
| installed := false |
| if c.latest == latest { |
| installed = true |
| c.latest = tree |
| c.latestMsg = msg |
| } else { |
| latest = c.latest |
| latestMsg = c.latestMsg |
| } |
| c.latestMu.Unlock() |
| |
| if installed { |
| return msgFuture, nil |
| } |
| } |
| } |
| |
| // checkTrees checks that older (from olderNote) is contained in newer (from newerNote). |
| // If an error occurs, such as malformed data or a network problem, checkTrees returns that error. |
| // If on the other hand checkTrees finds evidence of misbehavior, it prepares a detailed |
| // message and calls log.Fatal. |
| func (c *Client) checkTrees(older tlog.Tree, olderNote []byte, newer tlog.Tree, newerNote []byte) error { |
| thr := tlog.TileHashReader(newer, &c.tileReader) |
| h, err := tlog.TreeHash(older.N, thr) |
| if err != nil { |
| if older.N == newer.N { |
| return fmt.Errorf("checking tree#%d: %v", older.N, err) |
| } |
| return fmt.Errorf("checking tree#%d against tree#%d: %v", older.N, newer.N, err) |
| } |
| if h == older.Hash { |
| return nil |
| } |
| |
| // Detected a fork in the tree timeline. |
| // Start by reporting the inconsistent signed tree notes. |
| var buf bytes.Buffer |
| fmt.Fprintf(&buf, "SECURITY ERROR\n") |
| fmt.Fprintf(&buf, "go.sum database server misbehavior detected!\n\n") |
| indent := func(b []byte) []byte { |
| return bytes.Replace(b, []byte("\n"), []byte("\n\t"), -1) |
| } |
| fmt.Fprintf(&buf, "old database:\n\t%s\n", indent(olderNote)) |
| fmt.Fprintf(&buf, "new database:\n\t%s\n", indent(newerNote)) |
| |
| // The notes alone are not enough to prove the inconsistency. |
| // We also need to show that the newer note's tree hash for older.N |
| // does not match older.Hash. The consumer of this report could |
| // of course consult the server to try to verify the inconsistency, |
| // but we are holding all the bits we need to prove it right now, |
| // so we might as well print them and make the report not depend |
| // on the continued availability of the misbehaving server. |
| // Preparing this data only reuses the tiled hashes needed for |
| // tlog.TreeHash(older.N, thr) above, so assuming thr is caching tiles, |
| // there are no new access to the server here, and these operations cannot fail. |
| fmt.Fprintf(&buf, "proof of misbehavior:\n\t%v", h) |
| if p, err := tlog.ProveTree(newer.N, older.N, thr); err != nil { |
| fmt.Fprintf(&buf, "\tinternal error: %v\n", err) |
| } else if err := tlog.CheckTree(p, newer.N, newer.Hash, older.N, h); err != nil { |
| fmt.Fprintf(&buf, "\tinternal error: generated inconsistent proof\n") |
| } else { |
| for _, h := range p { |
| fmt.Fprintf(&buf, "\n\t%v", h) |
| } |
| } |
| c.ops.SecurityError(buf.String()) |
| return ErrSecurity |
| } |
| |
| // checkRecord checks that record #id's hash matches data. |
| func (c *Client) checkRecord(id int64, data []byte) error { |
| c.latestMu.Lock() |
| latest := c.latest |
| c.latestMu.Unlock() |
| |
| if id >= latest.N { |
| return fmt.Errorf("cannot validate record %d in tree of size %d", id, latest.N) |
| } |
| hashes, err := tlog.TileHashReader(latest, &c.tileReader).ReadHashes([]int64{tlog.StoredHashIndex(0, id)}) |
| if err != nil { |
| return err |
| } |
| if hashes[0] == tlog.RecordHash(data) { |
| return nil |
| } |
| return fmt.Errorf("cannot authenticate record data in server response") |
| } |
| |
| // tileReader is a *Client wrapper that implements tlog.TileReader. |
| // The separate type avoids exposing the ReadTiles and SaveTiles |
| // methods on Client itself. |
| type tileReader struct { |
| c *Client |
| } |
| |
| func (r *tileReader) Height() int { |
| return r.c.tileHeight |
| } |
| |
| // ReadTiles reads and returns the requested tiles, |
| // either from the on-disk cache or the server. |
| func (r *tileReader) ReadTiles(tiles []tlog.Tile) ([][]byte, error) { |
| // Read all the tiles in parallel. |
| data := make([][]byte, len(tiles)) |
| errs := make([]error, len(tiles)) |
| var wg sync.WaitGroup |
| for i, tile := range tiles { |
| wg.Add(1) |
| go func(i int, tile tlog.Tile) { |
| defer wg.Done() |
| data[i], errs[i] = r.c.readTile(tile) |
| }(i, tile) |
| } |
| wg.Wait() |
| |
| for _, err := range errs { |
| if err != nil { |
| return nil, err |
| } |
| } |
| |
| return data, nil |
| } |
| |
| // tileCacheKey returns the cache key for the tile. |
| func (c *Client) tileCacheKey(tile tlog.Tile) string { |
| return c.name + "/" + tile.Path() |
| } |
| |
| // tileRemotePath returns the remote path for the tile. |
| func (c *Client) tileRemotePath(tile tlog.Tile) string { |
| return "/" + tile.Path() |
| } |
| |
| // readTile reads a single tile, either from the on-disk cache or the server. |
| func (c *Client) readTile(tile tlog.Tile) ([]byte, error) { |
| type cached struct { |
| data []byte |
| err error |
| } |
| |
| result := c.tileCache.Do(tile, func() interface{} { |
| // Try the requested tile in on-disk cache. |
| data, err := c.ops.ReadCache(c.tileCacheKey(tile)) |
| if err == nil { |
| c.markTileSaved(tile) |
| return cached{data, nil} |
| } |
| |
| // Try the full tile in on-disk cache (if requested tile not already full). |
| // We only save authenticated tiles to the on-disk cache, |
| // so the recreated prefix is equally authenticated. |
| full := tile |
| full.W = 1 << uint(tile.H) |
| if tile != full { |
| data, err := c.ops.ReadCache(c.tileCacheKey(full)) |
| if err == nil { |
| c.markTileSaved(tile) // don't save tile later; we already have full |
| return cached{data[:len(data)/full.W*tile.W], nil} |
| } |
| } |
| |
| // Try requested tile from server. |
| data, err = c.ops.ReadRemote(c.tileRemotePath(tile)) |
| if err == nil { |
| return cached{data, nil} |
| } |
| |
| // Try full tile on server. |
| // If the partial tile does not exist, it should be because |
| // the tile has been completed and only the complete one |
| // is available. |
| if tile != full { |
| data, err := c.ops.ReadRemote(c.tileRemotePath(full)) |
| if err == nil { |
| // Note: We could save the full tile in the on-disk cache here, |
| // but we don't know if it is valid yet, and we will only find out |
| // about the partial data, not the full data. So let SaveTiles |
| // save the partial tile, and we'll just refetch the full tile later |
| // once we can validate more (or all) of it. |
| return cached{data[:len(data)/full.W*tile.W], nil} |
| } |
| } |
| |
| // Nothing worked. |
| // Return the error from the server fetch for the requested (not full) tile. |
| return cached{nil, err} |
| }).(cached) |
| |
| return result.data, result.err |
| } |
| |
| // markTileSaved records that tile is already present in the on-disk cache, |
| // so that a future SaveTiles for that tile can be ignored. |
| func (c *Client) markTileSaved(tile tlog.Tile) { |
| c.tileSavedMu.Lock() |
| c.tileSaved[tile] = true |
| c.tileSavedMu.Unlock() |
| } |
| |
| // SaveTiles saves the now validated tiles. |
| func (r *tileReader) SaveTiles(tiles []tlog.Tile, data [][]byte) { |
| c := r.c |
| |
| // Determine which tiles need saving. |
| // (Tiles that came from the cache need not be saved back.) |
| save := make([]bool, len(tiles)) |
| c.tileSavedMu.Lock() |
| for i, tile := range tiles { |
| if !c.tileSaved[tile] { |
| save[i] = true |
| c.tileSaved[tile] = true |
| } |
| } |
| c.tileSavedMu.Unlock() |
| |
| for i, tile := range tiles { |
| if save[i] { |
| // If WriteCache fails here (out of disk space? i/o error?), |
| // c.tileSaved[tile] is still true and we will not try to write it again. |
| // Next time we run maybe we'll redownload it again and be |
| // more successful. |
| c.ops.WriteCache(c.name+"/"+tile.Path(), data[i]) |
| } |
| } |
| } |