internal/gaby: add /embed endpoint to sync only embeddings

Change-Id: If50ee6ef956a894090f134cdd9f3519f784edd12
Reviewed-on: https://go-review.googlesource.com/c/oscar/+/708878
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com>
diff --git a/internal/embeddocs/sync.go b/internal/embeddocs/sync.go
index 1c193f3..3430c8e 100644
--- a/internal/embeddocs/sync.go
+++ b/internal/embeddocs/sync.go
@@ -59,15 +59,22 @@
 		return nil
 	}
 
+	var start, end string
 	for d := range w.Recent() {
-		lg.Debug("embeddocs sync start", "model", model, "doc", d.ID)
+		if start == "" {
+			start = d.ID
+		}
+		end = d.ID
 		batch = append(batch, llm.EmbedDoc{Title: d.Title, Text: d.Text})
 		ids = append(ids, d.ID)
 		batchLast = d.DBTime
 		if len(batch) >= batchSize {
+			lg.Debug("embeddocs sync flush", "model", model, "start", start, "end", end)
 			if err := flush(); err != nil {
 				return err
 			}
+			start = ""
+			end = ""
 		}
 	}
 	if len(batch) > 0 {
@@ -75,6 +82,7 @@
 		// which has to be called during an iteration over w.Recent.
 		// Start a new iteration just to call flush and then break out.
 		for _ = range w.Recent() {
+			lg.Debug("embeddocs sync flush", "model", model, "start", start, "end", end)
 			if err := flush(); err != nil {
 				return err
 			}
diff --git a/internal/gaby/main.go b/internal/gaby/main.go
index d2ef278..a7cccb1 100644
--- a/internal/gaby/main.go
+++ b/internal/gaby/main.go
@@ -540,10 +540,12 @@
 		setLevelEndpoint    = "setlevel"
 		githubEventEndpoint = "github-event"
 		crawlEndpoint       = "crawl"
+		embedEndpoint       = "embed"
 		bisectEndpoint      = "bisect"
 	)
 	cronEndpointCounter := g.newEndpointCounter(cronEndpoint)
 	crawlEndpointCounter := g.newEndpointCounter(crawlEndpoint)
+	embedEndpointCounter := g.newEndpointCounter(embedEndpoint)
 	githubEventEndpointCounter := g.newEndpointCounter(githubEventEndpoint)
 
 	mux := http.NewServeMux()
@@ -586,6 +588,23 @@
 		cronEndpointCounter.Add(r.Context(), 1)
 	})
 
+	// embedEndpoint is meant to be called by hand
+	// when running a binary with a new embedding database configured,
+	// to backfill embeddings.
+	mux.HandleFunc("GET /"+embedEndpoint, func(w http.ResponseWriter, r *http.Request) {
+		g.slog.Info(embedEndpoint + " start")
+		defer g.slog.Info(embedEndpoint + " end")
+
+		// No lock here - g.embedAll already locks.
+
+		if err := g.embedAll(g.ctx); err != nil {
+			report(err, r)
+			http.Error(w, err.Error(), http.StatusInternalServerError)
+		}
+
+		embedEndpointCounter.Add(r.Context(), 1)
+	})
+
 	// crawlEndpoint triggers the web crawl configured in [Gaby.crawler].
 	// It is intended to be triggered by a Cloud Scheduler job (or similar)
 	// to run periodically.