| // Copyright 2019 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package worker |
| |
| import ( |
| "context" |
| "database/sql" |
| "errors" |
| "fmt" |
| "net/http" |
| "time" |
| |
| "github.com/go-redis/redis/v7" |
| "golang.org/x/pkgsite/internal/complete" |
| "golang.org/x/pkgsite/internal/database" |
| "golang.org/x/pkgsite/internal/derrors" |
| "golang.org/x/pkgsite/internal/log" |
| ) |
| |
| const popularCutoff = 50 |
| |
| // handleUpdateRedisIndexes scans recently modified search documents, and |
| // updates redis auto completion indexes with data from these documents. |
| func (s *Server) handleUpdateRedisIndexes(w http.ResponseWriter, r *http.Request) error { |
| ctx := r.Context() |
| err := updateRedisIndexes(ctx, s.db.Underlying(), s.redisHAClient, popularCutoff) |
| if err != nil { |
| return err |
| } |
| fmt.Fprint(w, "OK") |
| return nil |
| } |
| |
| // updateRedisIndexes updates redisClient with autocompletion data from db. |
| // cutoff specifies the number of importers at which a package is considered |
| // popular, and is passed-in as an argument to facilitate testing. |
| func updateRedisIndexes(ctx context.Context, db *database.DB, redisClient *redis.Client, cutoff int) (err error) { |
| defer derrors.Wrap(&err, "updateRedisIndexes") |
| if redisClient == nil { |
| return errors.New("redis HA client is nil") |
| } |
| |
| // For autocompletion, we track two separate "indexes" (sorted sets of |
| // package path suffixes): one for popular packages, and one for the |
| // remainder, as defined by the popularCutoff const. This allows us to |
| // suggest popular completions, even when the user input is short (i.e. we |
| // want to suggest 'fmt' when the user types 'f', but don't want to scan all |
| // completions that start with the letter 'f'). |
| // |
| // This function scans search documents in the database and builds up a |
| // pipeline that writes these two sorted sets to Redis, using timestamped |
| // temporary keys, and then renames them to the keys used by the frontend for |
| // autocompletion. |
| // |
| // See https://redis.io/commands/rename for more information on renaming: |
| // it's unclear whether renaming is atomic, but we don't really care. |
| // Populating these indexes currently takes 1-2 minutes, and renaming takes |
| // 1-2 seconds. Even if completions are broken during this 1-2 seconds, it's |
| // preferable to them being broken for 1-2 minutes. We could do something |
| // more clever, such as updating the completion data in place using |
| // ZREMRANGEBYLEX followed by ZADD, but that would be significantly more |
| // complicated. |
| // |
| // One additional concern of this operation is that we temporary double the |
| // size of our redis database while we're staging the new completion data. |
| // That's fine, but it's dangerous if we ever have a bug and this operation |
| // was either not cleaned up properly, or run concurrently. In light of this, |
| // we first look for evidence of another update operation currently running, |
| // by scanning Redis for keys that match the temporary key pattern. |
| |
| // Check for an ongoing update operation, as described above. |
| tempKeyPattern := fmt.Sprintf("%s*-*", complete.KeyPrefix) |
| existing, _, err := redisClient.Scan(0, tempKeyPattern, 1).Result() |
| if err != nil { |
| return fmt.Errorf(`redis error: Scan(%q): %v`, tempKeyPattern, err) |
| } |
| if len(existing) > 0 { |
| return fmt.Errorf("found existing in-progress completion index: %v", existing[0]) |
| } |
| |
| // Use temporary timestamped keys while we write the completion data, as it |
| // can take ~minutes. |
| keyPop := fmt.Sprintf("%s-%s", complete.PopularKey, time.Now().Format(time.RFC3339)) |
| keyRem := fmt.Sprintf("%s-%s", complete.RemainingKey, time.Now().Format(time.RFC3339)) |
| |
| // Always clean up: DEL succeeds even if the keys have been renamed. |
| defer func() { |
| if _, err := redisClient.Del(keyPop).Result(); err != nil { |
| log.Errorf(ctx, "redisClient.Del(%q): %v", keyPop, err) |
| } |
| if _, err := redisClient.Del(keyRem).Result(); err != nil { |
| log.Errorf(ctx, "redisClient.Del(%q): %v", keyRem, err) |
| } |
| }() |
| |
| // pipeSize tracks the number of ZADD statements in the pipe. |
| pipeSize := 0 |
| pipe := redisClient.Pipeline() |
| defer pipe.Close() |
| // flush executes the current pipeline and resets its state. |
| flush := func() error { |
| log.Infof(ctx, "Writing completion data pipeline of size %d.", pipeSize) |
| if _, err := pipe.ExecContext(ctx); err != nil { |
| return fmt.Errorf("redis error: pipe.Exec: %v", err) |
| } |
| // As of writing this is unnecessary as ExecContext resets the pipeline |
| // commands, but since this is not documented functionality we explicitly |
| // Discard. |
| pipe.Discard() |
| pipeSize = 0 |
| return nil |
| } |
| // Track whether or not we have any entries in the popular or remaining |
| // indexes. This is an edge case, but if we don't insert any entries for a |
| // given index the key won't exist and we'll get an error when renaming. |
| var ( |
| haveRemaining bool |
| havePopular bool |
| ) |
| // As of writing there were around 5M entries in our index, so writing in |
| // batches of 1M should result in ~6 batches. |
| const batchSize = 1e6 |
| // processRow builds up a Redis pipeline as we scan the search_documents |
| // table. |
| processRow := func(rows *sql.Rows) error { |
| var partial complete.Completion |
| if err := rows.Scan(&partial.PackagePath, &partial.ModulePath, &partial.Version, &partial.Importers); err != nil { |
| return fmt.Errorf("rows.Scan: %v", err) |
| } |
| cmpls := complete.PathCompletions(partial) |
| var zs []*redis.Z |
| for _, cmpl := range cmpls { |
| zs = append(zs, &redis.Z{Member: cmpl.Encode()}) |
| } |
| switch { |
| case partial.Importers >= cutoff: |
| havePopular = true |
| pipe.ZAdd(keyPop, zs...) |
| default: |
| haveRemaining = true |
| pipe.ZAdd(keyRem, zs...) |
| } |
| pipeSize += len(zs) |
| if pipeSize > batchSize { |
| if err := flush(); err != nil { |
| return err |
| } |
| } |
| return nil |
| } |
| |
| // Here we use the *database.DB rather than a function on postgres.DB, |
| // so that we can write to our redis pipeline while we stream results from |
| // the DB. Otherwise, we would have to: |
| // - add a method on postgres.DB for the trivial query above |
| // - add a type (or reuse SearchResult) to hold the subset of search |
| // document data used here. |
| // - hold two copies of all search results in memory while building the |
| // redis pipeline below. |
| query := ` |
| SELECT package_path, module_path, version, imported_by_count |
| FROM search_documents` |
| if err := db.RunQuery(ctx, query, processRow); err != nil { |
| return err |
| } |
| if err := flush(); err != nil { |
| return err |
| } |
| pipe.Close() |
| if havePopular { |
| log.Infof(ctx, "Renaming %q to %q", keyPop, complete.PopularKey) |
| if _, err := redisClient.Rename(keyPop, complete.PopularKey).Result(); err != nil { |
| return fmt.Errorf(`redis error: Rename(%q, %q): %v`, keyPop, complete.PopularKey, err) |
| } |
| } |
| if haveRemaining { |
| log.Infof(ctx, "Renaming %q to %q", keyRem, complete.RemainingKey) |
| if _, err := redisClient.Rename(keyRem, complete.RemainingKey).Result(); err != nil { |
| return fmt.Errorf(`redis error: Rename(%q, %q): %v`, keyRem, complete.RemainingKey, err) |
| } |
| } |
| return nil |
| } |