| // Copyright 2012 Google Inc. All rights reserved. |
| // Use of this source code is governed by the Apache 2.0 |
| // license that can be found in the LICENSE file. |
| |
| package search // import "google.golang.org/appengine/search" |
| |
| // TODO: let Put specify the document language: "en", "fr", etc. Also: order_id?? storage?? |
| // TODO: Index.GetAll (or Iterator.GetAll)? |
| // TODO: struct <-> protobuf tests. |
| // TODO: enforce Python's MIN_NUMBER_VALUE and MIN_DATE (which would disallow a zero |
| // time.Time)? _MAXIMUM_STRING_LENGTH? |
| |
| import ( |
| "errors" |
| "fmt" |
| "math" |
| "reflect" |
| "regexp" |
| "strconv" |
| "strings" |
| "time" |
| "unicode/utf8" |
| |
| "github.com/golang/protobuf/proto" |
| "golang.org/x/net/context" |
| |
| "google.golang.org/appengine" |
| "google.golang.org/appengine/internal" |
| pb "google.golang.org/appengine/internal/search" |
| ) |
| |
| const maxDocumentsPerPutDelete = 200 |
| |
| var ( |
| // ErrInvalidDocumentType is returned when methods like Put, Get or Next |
| // are passed a dst or src argument of invalid type. |
| ErrInvalidDocumentType = errors.New("search: invalid document type") |
| |
| // ErrNoSuchDocument is returned when no document was found for a given ID. |
| ErrNoSuchDocument = errors.New("search: no such document") |
| |
| // ErrTooManyDocuments is returned when the user passes too many documents to |
| // PutMulti or DeleteMulti. |
| ErrTooManyDocuments = fmt.Errorf("search: too many documents given to put or delete (max is %d)", maxDocumentsPerPutDelete) |
| ) |
| |
| // Atom is a document field whose contents are indexed as a single indivisible |
| // string. |
| type Atom string |
| |
| // HTML is a document field whose contents are indexed as HTML. Only text nodes |
| // are indexed: "foo<b>bar" will be treated as "foobar". |
| type HTML string |
| |
| // validIndexNameOrDocID is the Go equivalent of Python's |
| // _ValidateVisiblePrintableAsciiNotReserved. |
| func validIndexNameOrDocID(s string) bool { |
| if strings.HasPrefix(s, "!") { |
| return false |
| } |
| for _, c := range s { |
| if c < 0x21 || 0x7f <= c { |
| return false |
| } |
| } |
| return true |
| } |
| |
| var ( |
| fieldNameRE = regexp.MustCompile(`^[A-Za-z][A-Za-z0-9_]*$`) |
| languageRE = regexp.MustCompile(`^[a-z]{2}$`) |
| ) |
| |
| // validFieldName is the Go equivalent of Python's _CheckFieldName. It checks |
| // the validity of both field and facet names. |
| func validFieldName(s string) bool { |
| return len(s) <= 500 && fieldNameRE.MatchString(s) |
| } |
| |
| // validDocRank checks that the ranks is in the range [0, 2^31). |
| func validDocRank(r int) bool { |
| return 0 <= r && r <= (1<<31-1) |
| } |
| |
| // validLanguage checks that a language looks like ISO 639-1. |
| func validLanguage(s string) bool { |
| return languageRE.MatchString(s) |
| } |
| |
| // validFloat checks that f is in the range [-2147483647, 2147483647]. |
| func validFloat(f float64) bool { |
| return -(1<<31-1) <= f && f <= (1<<31-1) |
| } |
| |
| // Index is an index of documents. |
| type Index struct { |
| spec pb.IndexSpec |
| } |
| |
| // orderIDEpoch forms the basis for populating OrderId on documents. |
| var orderIDEpoch = time.Date(2011, 1, 1, 0, 0, 0, 0, time.UTC) |
| |
| // Open opens the index with the given name. The index is created if it does |
| // not already exist. |
| // |
| // The name is a human-readable ASCII string. It must contain no whitespace |
| // characters and not start with "!". |
| func Open(name string) (*Index, error) { |
| if !validIndexNameOrDocID(name) { |
| return nil, fmt.Errorf("search: invalid index name %q", name) |
| } |
| return &Index{ |
| spec: pb.IndexSpec{ |
| Name: &name, |
| }, |
| }, nil |
| } |
| |
| // Put saves src to the index. If id is empty, a new ID is allocated by the |
| // service and returned. If id is not empty, any existing index entry for that |
| // ID is replaced. |
| // |
| // The ID is a human-readable ASCII string. It must contain no whitespace |
| // characters and not start with "!". |
| // |
| // src must be a non-nil struct pointer or implement the FieldLoadSaver |
| // interface. |
| func (x *Index) Put(c context.Context, id string, src interface{}) (string, error) { |
| ids, err := x.PutMulti(c, []string{id}, []interface{}{src}) |
| if err != nil { |
| return "", err |
| } |
| return ids[0], nil |
| } |
| |
| // PutMulti is like Put, but is more efficient for adding multiple documents to |
| // the index at once. |
| // |
| // Up to 200 documents can be added at once. ErrTooManyDocuments is returned if |
| // you try to add more. |
| // |
| // ids can either be an empty slice (which means new IDs will be allocated for |
| // each of the documents added) or a slice the same size as srcs. |
| // |
| // The error may be an instance of appengine.MultiError, in which case it will |
| // be the same size as srcs and the individual errors inside will correspond |
| // with the items in srcs. |
| func (x *Index) PutMulti(c context.Context, ids []string, srcs []interface{}) ([]string, error) { |
| if len(ids) != 0 && len(srcs) != len(ids) { |
| return nil, fmt.Errorf("search: PutMulti expects ids and srcs slices of the same length") |
| } |
| if len(srcs) > maxDocumentsPerPutDelete { |
| return nil, ErrTooManyDocuments |
| } |
| |
| docs := make([]*pb.Document, len(srcs)) |
| for i, s := range srcs { |
| var err error |
| docs[i], err = saveDoc(s) |
| if err != nil { |
| return nil, err |
| } |
| |
| if len(ids) != 0 && ids[i] != "" { |
| if !validIndexNameOrDocID(ids[i]) { |
| return nil, fmt.Errorf("search: invalid ID %q", ids[i]) |
| } |
| docs[i].Id = proto.String(ids[i]) |
| } |
| } |
| |
| // spec is modified by Call when applying the current Namespace, so copy it to |
| // avoid retaining the namespace beyond the scope of the Call. |
| spec := x.spec |
| req := &pb.IndexDocumentRequest{ |
| Params: &pb.IndexDocumentParams{ |
| Document: docs, |
| IndexSpec: &spec, |
| }, |
| } |
| res := &pb.IndexDocumentResponse{} |
| if err := internal.Call(c, "search", "IndexDocument", req, res); err != nil { |
| return nil, err |
| } |
| multiErr, hasErr := make(appengine.MultiError, len(res.Status)), false |
| for i, s := range res.Status { |
| if s.GetCode() != pb.SearchServiceError_OK { |
| multiErr[i] = fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail()) |
| hasErr = true |
| } |
| } |
| if hasErr { |
| return res.DocId, multiErr |
| } |
| |
| if len(res.Status) != len(docs) || len(res.DocId) != len(docs) { |
| return nil, fmt.Errorf("search: internal error: wrong number of results (%d Statuses, %d DocIDs, expected %d)", |
| len(res.Status), len(res.DocId), len(docs)) |
| } |
| return res.DocId, nil |
| } |
| |
| // Get loads the document with the given ID into dst. |
| // |
| // The ID is a human-readable ASCII string. It must be non-empty, contain no |
| // whitespace characters and not start with "!". |
| // |
| // dst must be a non-nil struct pointer or implement the FieldLoadSaver |
| // interface. |
| // |
| // ErrFieldMismatch is returned when a field is to be loaded into a different |
| // type than the one it was stored from, or when a field is missing or |
| // unexported in the destination struct. ErrFieldMismatch is only returned if |
| // dst is a struct pointer. It is up to the callee to decide whether this error |
| // is fatal, recoverable, or ignorable. |
| func (x *Index) Get(c context.Context, id string, dst interface{}) error { |
| if id == "" || !validIndexNameOrDocID(id) { |
| return fmt.Errorf("search: invalid ID %q", id) |
| } |
| req := &pb.ListDocumentsRequest{ |
| Params: &pb.ListDocumentsParams{ |
| IndexSpec: &x.spec, |
| StartDocId: proto.String(id), |
| Limit: proto.Int32(1), |
| }, |
| } |
| res := &pb.ListDocumentsResponse{} |
| if err := internal.Call(c, "search", "ListDocuments", req, res); err != nil { |
| return err |
| } |
| if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK { |
| return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail()) |
| } |
| if len(res.Document) != 1 || res.Document[0].GetId() != id { |
| return ErrNoSuchDocument |
| } |
| return loadDoc(dst, res.Document[0], nil) |
| } |
| |
| // Delete deletes a document from the index. |
| func (x *Index) Delete(c context.Context, id string) error { |
| return x.DeleteMulti(c, []string{id}) |
| } |
| |
| // DeleteMulti deletes multiple documents from the index. |
| // |
| // The returned error may be an instance of appengine.MultiError, in which case |
| // it will be the same size as srcs and the individual errors inside will |
| // correspond with the items in srcs. |
| func (x *Index) DeleteMulti(c context.Context, ids []string) error { |
| if len(ids) > maxDocumentsPerPutDelete { |
| return ErrTooManyDocuments |
| } |
| |
| req := &pb.DeleteDocumentRequest{ |
| Params: &pb.DeleteDocumentParams{ |
| DocId: ids, |
| IndexSpec: &x.spec, |
| }, |
| } |
| res := &pb.DeleteDocumentResponse{} |
| if err := internal.Call(c, "search", "DeleteDocument", req, res); err != nil { |
| return err |
| } |
| if len(res.Status) != len(ids) { |
| return fmt.Errorf("search: internal error: wrong number of results (%d, expected %d)", |
| len(res.Status), len(ids)) |
| } |
| multiErr, hasErr := make(appengine.MultiError, len(ids)), false |
| for i, s := range res.Status { |
| if s.GetCode() != pb.SearchServiceError_OK { |
| multiErr[i] = fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail()) |
| hasErr = true |
| } |
| } |
| if hasErr { |
| return multiErr |
| } |
| return nil |
| } |
| |
| // List lists all of the documents in an index. The documents are returned in |
| // increasing ID order. |
| func (x *Index) List(c context.Context, opts *ListOptions) *Iterator { |
| t := &Iterator{ |
| c: c, |
| index: x, |
| count: -1, |
| listInclusive: true, |
| more: moreList, |
| } |
| if opts != nil { |
| t.listStartID = opts.StartID |
| t.limit = opts.Limit |
| t.idsOnly = opts.IDsOnly |
| } |
| return t |
| } |
| |
| func moreList(t *Iterator) error { |
| req := &pb.ListDocumentsRequest{ |
| Params: &pb.ListDocumentsParams{ |
| IndexSpec: &t.index.spec, |
| }, |
| } |
| if t.listStartID != "" { |
| req.Params.StartDocId = &t.listStartID |
| req.Params.IncludeStartDoc = &t.listInclusive |
| } |
| if t.limit > 0 { |
| req.Params.Limit = proto.Int32(int32(t.limit)) |
| } |
| if t.idsOnly { |
| req.Params.KeysOnly = &t.idsOnly |
| } |
| |
| res := &pb.ListDocumentsResponse{} |
| if err := internal.Call(t.c, "search", "ListDocuments", req, res); err != nil { |
| return err |
| } |
| if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK { |
| return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail()) |
| } |
| t.listRes = res.Document |
| t.listStartID, t.listInclusive, t.more = "", false, nil |
| if len(res.Document) != 0 && t.limit <= 0 { |
| if id := res.Document[len(res.Document)-1].GetId(); id != "" { |
| t.listStartID, t.more = id, moreList |
| } |
| } |
| return nil |
| } |
| |
| // ListOptions are the options for listing documents in an index. Passing a nil |
| // *ListOptions is equivalent to using the default values. |
| type ListOptions struct { |
| // StartID is the inclusive lower bound for the ID of the returned |
| // documents. The zero value means all documents will be returned. |
| StartID string |
| |
| // Limit is the maximum number of documents to return. The zero value |
| // indicates no limit. |
| Limit int |
| |
| // IDsOnly indicates that only document IDs should be returned for the list |
| // operation; no document fields are populated. |
| IDsOnly bool |
| } |
| |
| // Search searches the index for the given query. |
| func (x *Index) Search(c context.Context, query string, opts *SearchOptions) *Iterator { |
| t := &Iterator{ |
| c: c, |
| index: x, |
| searchQuery: query, |
| more: moreSearch, |
| } |
| if opts != nil { |
| if opts.Cursor != "" { |
| if opts.Offset != 0 { |
| return errIter("at most one of Cursor and Offset may be specified") |
| } |
| t.searchCursor = proto.String(string(opts.Cursor)) |
| } |
| t.limit = opts.Limit |
| t.fields = opts.Fields |
| t.idsOnly = opts.IDsOnly |
| t.sort = opts.Sort |
| t.exprs = opts.Expressions |
| t.refinements = opts.Refinements |
| t.facetOpts = opts.Facets |
| t.searchOffset = opts.Offset |
| t.countAccuracy = opts.CountAccuracy |
| } |
| return t |
| } |
| |
| func moreSearch(t *Iterator) error { |
| // We use per-result (rather than single/per-page) cursors since this |
| // lets us return a Cursor for every iterator document. The two cursor |
| // types are largely interchangeable: a page cursor is the same as the |
| // last per-result cursor in a given search response. |
| req := &pb.SearchRequest{ |
| Params: &pb.SearchParams{ |
| IndexSpec: &t.index.spec, |
| Query: &t.searchQuery, |
| Cursor: t.searchCursor, |
| CursorType: pb.SearchParams_PER_RESULT.Enum(), |
| FieldSpec: &pb.FieldSpec{ |
| Name: t.fields, |
| }, |
| }, |
| } |
| if t.limit > 0 { |
| req.Params.Limit = proto.Int32(int32(t.limit)) |
| } |
| if t.searchOffset > 0 { |
| req.Params.Offset = proto.Int32(int32(t.searchOffset)) |
| t.searchOffset = 0 |
| } |
| if t.countAccuracy > 0 { |
| req.Params.MatchedCountAccuracy = proto.Int32(int32(t.countAccuracy)) |
| } |
| if t.idsOnly { |
| req.Params.KeysOnly = &t.idsOnly |
| } |
| if t.sort != nil { |
| if err := sortToProto(t.sort, req.Params); err != nil { |
| return err |
| } |
| } |
| if t.refinements != nil { |
| if err := refinementsToProto(t.refinements, req.Params); err != nil { |
| return err |
| } |
| } |
| for _, e := range t.exprs { |
| req.Params.FieldSpec.Expression = append(req.Params.FieldSpec.Expression, &pb.FieldSpec_Expression{ |
| Name: proto.String(e.Name), |
| Expression: proto.String(e.Expr), |
| }) |
| } |
| for _, f := range t.facetOpts { |
| if err := f.setParams(req.Params); err != nil { |
| return fmt.Errorf("bad FacetSearchOption: %v", err) |
| } |
| } |
| // Don't repeat facet search. |
| t.facetOpts = nil |
| |
| res := &pb.SearchResponse{} |
| if err := internal.Call(t.c, "search", "Search", req, res); err != nil { |
| return err |
| } |
| if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK { |
| return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail()) |
| } |
| t.searchRes = res.Result |
| if len(res.FacetResult) > 0 { |
| t.facetRes = res.FacetResult |
| } |
| t.count = int(*res.MatchedCount) |
| if t.limit > 0 { |
| t.more = nil |
| } else { |
| t.more = moreSearch |
| } |
| return nil |
| } |
| |
| // SearchOptions are the options for searching an index. Passing a nil |
| // *SearchOptions is equivalent to using the default values. |
| type SearchOptions struct { |
| // Limit is the maximum number of documents to return. The zero value |
| // indicates no limit. |
| Limit int |
| |
| // IDsOnly indicates that only document IDs should be returned for the search |
| // operation; no document fields are populated. |
| IDsOnly bool |
| |
| // Sort controls the ordering of search results. |
| Sort *SortOptions |
| |
| // Fields specifies which document fields to include in the results. If omitted, |
| // all document fields are returned. No more than 100 fields may be specified. |
| Fields []string |
| |
| // Expressions specifies additional computed fields to add to each returned |
| // document. |
| Expressions []FieldExpression |
| |
| // Facets controls what facet information is returned for these search results. |
| // If no options are specified, no facet results will be returned. |
| Facets []FacetSearchOption |
| |
| // Refinements filters the returned documents by requiring them to contain facets |
| // with specific values. Refinements are applied in conjunction for facets with |
| // different names, and in disjunction otherwise. |
| Refinements []Facet |
| |
| // Cursor causes the results to commence with the first document after |
| // the document associated with the cursor. |
| Cursor Cursor |
| |
| // Offset specifies the number of documents to skip over before returning results. |
| // When specified, Cursor must be nil. |
| Offset int |
| |
| // CountAccuracy specifies the maximum result count that can be expected to |
| // be accurate. If zero, the count accuracy defaults to 20. |
| CountAccuracy int |
| } |
| |
| // Cursor represents an iterator's position. |
| // |
| // The string value of a cursor is web-safe. It can be saved and restored |
| // for later use. |
| type Cursor string |
| |
| // FieldExpression defines a custom expression to evaluate for each result. |
| type FieldExpression struct { |
| // Name is the name to use for the computed field. |
| Name string |
| |
| // Expr is evaluated to provide a custom content snippet for each document. |
| // See https://cloud.google.com/appengine/docs/standard/go/search/options for |
| // the supported expression syntax. |
| Expr string |
| } |
| |
| // FacetSearchOption controls what facet information is returned in search results. |
| type FacetSearchOption interface { |
| setParams(*pb.SearchParams) error |
| } |
| |
| // AutoFacetDiscovery returns a FacetSearchOption which enables automatic facet |
| // discovery for the search. Automatic facet discovery looks for the facets |
| // which appear the most often in the aggregate in the matched documents. |
| // |
| // The maximum number of facets returned is controlled by facetLimit, and the |
| // maximum number of values per facet by facetLimit. A limit of zero indicates |
| // a default limit should be used. |
| func AutoFacetDiscovery(facetLimit, valueLimit int) FacetSearchOption { |
| return &autoFacetOpt{facetLimit, valueLimit} |
| } |
| |
| type autoFacetOpt struct { |
| facetLimit, valueLimit int |
| } |
| |
| const defaultAutoFacetLimit = 10 // As per python runtime search.py. |
| |
| func (o *autoFacetOpt) setParams(params *pb.SearchParams) error { |
| lim := int32(o.facetLimit) |
| if lim == 0 { |
| lim = defaultAutoFacetLimit |
| } |
| params.AutoDiscoverFacetCount = &lim |
| if o.valueLimit > 0 { |
| params.FacetAutoDetectParam = &pb.FacetAutoDetectParam{ |
| ValueLimit: proto.Int32(int32(o.valueLimit)), |
| } |
| } |
| return nil |
| } |
| |
| // FacetDiscovery returns a FacetSearchOption which selects a facet to be |
| // returned with the search results. By default, the most frequently |
| // occurring values for that facet will be returned. However, you can also |
| // specify a list of particular Atoms or specific Ranges to return. |
| func FacetDiscovery(name string, value ...interface{}) FacetSearchOption { |
| return &facetOpt{name, value} |
| } |
| |
| type facetOpt struct { |
| name string |
| values []interface{} |
| } |
| |
| func (o *facetOpt) setParams(params *pb.SearchParams) error { |
| req := &pb.FacetRequest{Name: &o.name} |
| params.IncludeFacet = append(params.IncludeFacet, req) |
| if len(o.values) == 0 { |
| return nil |
| } |
| vtype := reflect.TypeOf(o.values[0]) |
| reqParam := &pb.FacetRequestParam{} |
| for _, v := range o.values { |
| if reflect.TypeOf(v) != vtype { |
| return errors.New("values must all be Atom, or must all be Range") |
| } |
| switch v := v.(type) { |
| case Atom: |
| reqParam.ValueConstraint = append(reqParam.ValueConstraint, string(v)) |
| case Range: |
| rng, err := rangeToProto(v) |
| if err != nil { |
| return fmt.Errorf("invalid range: %v", err) |
| } |
| reqParam.Range = append(reqParam.Range, rng) |
| default: |
| return fmt.Errorf("unsupported value type %T", v) |
| } |
| } |
| req.Params = reqParam |
| return nil |
| } |
| |
| // FacetDocumentDepth returns a FacetSearchOption which controls the number of |
| // documents to be evaluated with preparing facet results. |
| func FacetDocumentDepth(depth int) FacetSearchOption { |
| return facetDepthOpt(depth) |
| } |
| |
| type facetDepthOpt int |
| |
| func (o facetDepthOpt) setParams(params *pb.SearchParams) error { |
| params.FacetDepth = proto.Int32(int32(o)) |
| return nil |
| } |
| |
| // FacetResult represents the number of times a particular facet and value |
| // appeared in the documents matching a search request. |
| type FacetResult struct { |
| Facet |
| |
| // Count is the number of times this specific facet and value appeared in the |
| // matching documents. |
| Count int |
| } |
| |
| // Range represents a numeric range with inclusive start and exclusive end. |
| // Start may be specified as math.Inf(-1) to indicate there is no minimum |
| // value, and End may similarly be specified as math.Inf(1); at least one of |
| // Start or End must be a finite number. |
| type Range struct { |
| Start, End float64 |
| } |
| |
| var ( |
| negInf = math.Inf(-1) |
| posInf = math.Inf(1) |
| ) |
| |
| // AtLeast returns a Range matching any value greater than, or equal to, min. |
| func AtLeast(min float64) Range { |
| return Range{Start: min, End: posInf} |
| } |
| |
| // LessThan returns a Range matching any value less than max. |
| func LessThan(max float64) Range { |
| return Range{Start: negInf, End: max} |
| } |
| |
| // SortOptions control the ordering and scoring of search results. |
| type SortOptions struct { |
| // Expressions is a slice of expressions representing a multi-dimensional |
| // sort. |
| Expressions []SortExpression |
| |
| // Scorer, when specified, will cause the documents to be scored according to |
| // search term frequency. |
| Scorer Scorer |
| |
| // Limit is the maximum number of objects to score and/or sort. Limit cannot |
| // be more than 10,000. The zero value indicates a default limit. |
| Limit int |
| } |
| |
| // SortExpression defines a single dimension for sorting a document. |
| type SortExpression struct { |
| // Expr is evaluated to provide a sorting value for each document. |
| // See https://cloud.google.com/appengine/docs/standard/go/search/options for |
| // the supported expression syntax. |
| Expr string |
| |
| // Reverse causes the documents to be sorted in ascending order. |
| Reverse bool |
| |
| // The default value to use when no field is present or the expresion |
| // cannot be calculated for a document. For text sorts, Default must |
| // be of type string; for numeric sorts, float64. |
| Default interface{} |
| } |
| |
| // A Scorer defines how a document is scored. |
| type Scorer interface { |
| toProto(*pb.ScorerSpec) |
| } |
| |
| type enumScorer struct { |
| enum pb.ScorerSpec_Scorer |
| } |
| |
| func (e enumScorer) toProto(spec *pb.ScorerSpec) { |
| spec.Scorer = e.enum.Enum() |
| } |
| |
| var ( |
| // MatchScorer assigns a score based on term frequency in a document. |
| MatchScorer Scorer = enumScorer{pb.ScorerSpec_MATCH_SCORER} |
| |
| // RescoringMatchScorer assigns a score based on the quality of the query |
| // match. It is similar to a MatchScorer but uses a more complex scoring |
| // algorithm based on match term frequency and other factors like field type. |
| // Please be aware that this algorithm is continually refined and can change |
| // over time without notice. This means that the ordering of search results |
| // that use this scorer can also change without notice. |
| RescoringMatchScorer Scorer = enumScorer{pb.ScorerSpec_RESCORING_MATCH_SCORER} |
| ) |
| |
| func sortToProto(sort *SortOptions, params *pb.SearchParams) error { |
| for _, e := range sort.Expressions { |
| spec := &pb.SortSpec{ |
| SortExpression: proto.String(e.Expr), |
| } |
| if e.Reverse { |
| spec.SortDescending = proto.Bool(false) |
| } |
| if e.Default != nil { |
| switch d := e.Default.(type) { |
| case float64: |
| spec.DefaultValueNumeric = &d |
| case string: |
| spec.DefaultValueText = &d |
| default: |
| return fmt.Errorf("search: invalid Default type %T for expression %q", d, e.Expr) |
| } |
| } |
| params.SortSpec = append(params.SortSpec, spec) |
| } |
| |
| spec := &pb.ScorerSpec{} |
| if sort.Limit > 0 { |
| spec.Limit = proto.Int32(int32(sort.Limit)) |
| params.ScorerSpec = spec |
| } |
| if sort.Scorer != nil { |
| sort.Scorer.toProto(spec) |
| params.ScorerSpec = spec |
| } |
| |
| return nil |
| } |
| |
| func refinementsToProto(refinements []Facet, params *pb.SearchParams) error { |
| for _, r := range refinements { |
| ref := &pb.FacetRefinement{ |
| Name: proto.String(r.Name), |
| } |
| switch v := r.Value.(type) { |
| case Atom: |
| ref.Value = proto.String(string(v)) |
| case Range: |
| rng, err := rangeToProto(v) |
| if err != nil { |
| return fmt.Errorf("search: refinement for facet %q: %v", r.Name, err) |
| } |
| // Unfortunately there are two identical messages for identify Facet ranges. |
| ref.Range = &pb.FacetRefinement_Range{Start: rng.Start, End: rng.End} |
| default: |
| return fmt.Errorf("search: unsupported refinement for facet %q of type %T", r.Name, v) |
| } |
| params.FacetRefinement = append(params.FacetRefinement, ref) |
| } |
| return nil |
| } |
| |
| func rangeToProto(r Range) (*pb.FacetRange, error) { |
| rng := &pb.FacetRange{} |
| if r.Start != negInf { |
| if !validFloat(r.Start) { |
| return nil, errors.New("invalid value for Start") |
| } |
| rng.Start = proto.String(strconv.FormatFloat(r.Start, 'e', -1, 64)) |
| } else if r.End == posInf { |
| return nil, errors.New("either Start or End must be finite") |
| } |
| if r.End != posInf { |
| if !validFloat(r.End) { |
| return nil, errors.New("invalid value for End") |
| } |
| rng.End = proto.String(strconv.FormatFloat(r.End, 'e', -1, 64)) |
| } |
| return rng, nil |
| } |
| |
| func protoToRange(rng *pb.FacetRefinement_Range) Range { |
| r := Range{Start: negInf, End: posInf} |
| if x, err := strconv.ParseFloat(rng.GetStart(), 64); err != nil { |
| r.Start = x |
| } |
| if x, err := strconv.ParseFloat(rng.GetEnd(), 64); err != nil { |
| r.End = x |
| } |
| return r |
| } |
| |
| // Iterator is the result of searching an index for a query or listing an |
| // index. |
| type Iterator struct { |
| c context.Context |
| index *Index |
| err error |
| |
| listRes []*pb.Document |
| listStartID string |
| listInclusive bool |
| |
| searchRes []*pb.SearchResult |
| facetRes []*pb.FacetResult |
| searchQuery string |
| searchCursor *string |
| searchOffset int |
| sort *SortOptions |
| |
| fields []string |
| exprs []FieldExpression |
| refinements []Facet |
| facetOpts []FacetSearchOption |
| |
| more func(*Iterator) error |
| |
| count int |
| countAccuracy int |
| limit int // items left to return; 0 for unlimited. |
| idsOnly bool |
| } |
| |
| // errIter returns an iterator that only returns the given error. |
| func errIter(err string) *Iterator { |
| return &Iterator{ |
| err: errors.New(err), |
| } |
| } |
| |
| // Done is returned when a query iteration has completed. |
| var Done = errors.New("search: query has no more results") |
| |
| // Count returns an approximation of the number of documents matched by the |
| // query. It is only valid to call for iterators returned by Search. |
| func (t *Iterator) Count() int { return t.count } |
| |
| // fetchMore retrieves more results, if there are no errors or pending results. |
| func (t *Iterator) fetchMore() { |
| if t.err == nil && len(t.listRes)+len(t.searchRes) == 0 && t.more != nil { |
| t.err = t.more(t) |
| } |
| } |
| |
| // Next returns the ID of the next result. When there are no more results, |
| // Done is returned as the error. |
| // |
| // dst must be a non-nil struct pointer, implement the FieldLoadSaver |
| // interface, or be a nil interface value. If a non-nil dst is provided, it |
| // will be filled with the indexed fields. dst is ignored if this iterator was |
| // created with an IDsOnly option. |
| func (t *Iterator) Next(dst interface{}) (string, error) { |
| t.fetchMore() |
| if t.err != nil { |
| return "", t.err |
| } |
| |
| var doc *pb.Document |
| var exprs []*pb.Field |
| switch { |
| case len(t.listRes) != 0: |
| doc = t.listRes[0] |
| t.listRes = t.listRes[1:] |
| case len(t.searchRes) != 0: |
| doc = t.searchRes[0].Document |
| exprs = t.searchRes[0].Expression |
| t.searchCursor = t.searchRes[0].Cursor |
| t.searchRes = t.searchRes[1:] |
| default: |
| return "", Done |
| } |
| if doc == nil { |
| return "", errors.New("search: internal error: no document returned") |
| } |
| if !t.idsOnly && dst != nil { |
| if err := loadDoc(dst, doc, exprs); err != nil { |
| return "", err |
| } |
| } |
| return doc.GetId(), nil |
| } |
| |
| // Cursor returns the cursor associated with the current document (that is, |
| // the document most recently returned by a call to Next). |
| // |
| // Passing this cursor in a future call to Search will cause those results |
| // to commence with the first document after the current document. |
| func (t *Iterator) Cursor() Cursor { |
| if t.searchCursor == nil { |
| return "" |
| } |
| return Cursor(*t.searchCursor) |
| } |
| |
| // Facets returns the facets found within the search results, if any facets |
| // were requested in the SearchOptions. |
| func (t *Iterator) Facets() ([][]FacetResult, error) { |
| t.fetchMore() |
| if t.err != nil && t.err != Done { |
| return nil, t.err |
| } |
| |
| var facets [][]FacetResult |
| for _, f := range t.facetRes { |
| fres := make([]FacetResult, 0, len(f.Value)) |
| for _, v := range f.Value { |
| ref := v.Refinement |
| facet := FacetResult{ |
| Facet: Facet{Name: ref.GetName()}, |
| Count: int(v.GetCount()), |
| } |
| if ref.Value != nil { |
| facet.Value = Atom(*ref.Value) |
| } else { |
| facet.Value = protoToRange(ref.Range) |
| } |
| fres = append(fres, facet) |
| } |
| facets = append(facets, fres) |
| } |
| return facets, nil |
| } |
| |
| // saveDoc converts from a struct pointer or |
| // FieldLoadSaver/FieldMetadataLoadSaver to the Document protobuf. |
| func saveDoc(src interface{}) (*pb.Document, error) { |
| var err error |
| var fields []Field |
| var meta *DocumentMetadata |
| switch x := src.(type) { |
| case FieldLoadSaver: |
| fields, meta, err = x.Save() |
| default: |
| fields, meta, err = saveStructWithMeta(src) |
| } |
| if err != nil { |
| return nil, err |
| } |
| |
| fieldsProto, err := fieldsToProto(fields) |
| if err != nil { |
| return nil, err |
| } |
| d := &pb.Document{ |
| Field: fieldsProto, |
| OrderId: proto.Int32(int32(time.Since(orderIDEpoch).Seconds())), |
| OrderIdSource: pb.Document_DEFAULTED.Enum(), |
| } |
| if meta != nil { |
| if meta.Rank != 0 { |
| if !validDocRank(meta.Rank) { |
| return nil, fmt.Errorf("search: invalid rank %d, must be [0, 2^31)", meta.Rank) |
| } |
| *d.OrderId = int32(meta.Rank) |
| d.OrderIdSource = pb.Document_SUPPLIED.Enum() |
| } |
| if len(meta.Facets) > 0 { |
| facets, err := facetsToProto(meta.Facets) |
| if err != nil { |
| return nil, err |
| } |
| d.Facet = facets |
| } |
| } |
| return d, nil |
| } |
| |
| func fieldsToProto(src []Field) ([]*pb.Field, error) { |
| // Maps to catch duplicate time or numeric fields. |
| timeFields, numericFields := make(map[string]bool), make(map[string]bool) |
| dst := make([]*pb.Field, 0, len(src)) |
| for _, f := range src { |
| if !validFieldName(f.Name) { |
| return nil, fmt.Errorf("search: invalid field name %q", f.Name) |
| } |
| fieldValue := &pb.FieldValue{} |
| switch x := f.Value.(type) { |
| case string: |
| fieldValue.Type = pb.FieldValue_TEXT.Enum() |
| fieldValue.StringValue = proto.String(x) |
| case Atom: |
| fieldValue.Type = pb.FieldValue_ATOM.Enum() |
| fieldValue.StringValue = proto.String(string(x)) |
| case HTML: |
| fieldValue.Type = pb.FieldValue_HTML.Enum() |
| fieldValue.StringValue = proto.String(string(x)) |
| case time.Time: |
| if timeFields[f.Name] { |
| return nil, fmt.Errorf("search: duplicate time field %q", f.Name) |
| } |
| timeFields[f.Name] = true |
| fieldValue.Type = pb.FieldValue_DATE.Enum() |
| fieldValue.StringValue = proto.String(strconv.FormatInt(x.UnixNano()/1e6, 10)) |
| case float64: |
| if numericFields[f.Name] { |
| return nil, fmt.Errorf("search: duplicate numeric field %q", f.Name) |
| } |
| if !validFloat(x) { |
| return nil, fmt.Errorf("search: numeric field %q with invalid value %f", f.Name, x) |
| } |
| numericFields[f.Name] = true |
| fieldValue.Type = pb.FieldValue_NUMBER.Enum() |
| fieldValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64)) |
| case appengine.GeoPoint: |
| if !x.Valid() { |
| return nil, fmt.Errorf( |
| "search: GeoPoint field %q with invalid value %v", |
| f.Name, x) |
| } |
| fieldValue.Type = pb.FieldValue_GEO.Enum() |
| fieldValue.Geo = &pb.FieldValue_Geo{ |
| Lat: proto.Float64(x.Lat), |
| Lng: proto.Float64(x.Lng), |
| } |
| default: |
| return nil, fmt.Errorf("search: unsupported field type: %v", reflect.TypeOf(f.Value)) |
| } |
| if f.Language != "" { |
| switch f.Value.(type) { |
| case string, HTML: |
| if !validLanguage(f.Language) { |
| return nil, fmt.Errorf("search: invalid language for field %q: %q", f.Name, f.Language) |
| } |
| fieldValue.Language = proto.String(f.Language) |
| default: |
| return nil, fmt.Errorf("search: setting language not supported for field %q of type %T", f.Name, f.Value) |
| } |
| } |
| if p := fieldValue.StringValue; p != nil && !utf8.ValidString(*p) { |
| return nil, fmt.Errorf("search: %q field is invalid UTF-8: %q", f.Name, *p) |
| } |
| dst = append(dst, &pb.Field{ |
| Name: proto.String(f.Name), |
| Value: fieldValue, |
| }) |
| } |
| return dst, nil |
| } |
| |
| func facetsToProto(src []Facet) ([]*pb.Facet, error) { |
| dst := make([]*pb.Facet, 0, len(src)) |
| for _, f := range src { |
| if !validFieldName(f.Name) { |
| return nil, fmt.Errorf("search: invalid facet name %q", f.Name) |
| } |
| facetValue := &pb.FacetValue{} |
| switch x := f.Value.(type) { |
| case Atom: |
| if !utf8.ValidString(string(x)) { |
| return nil, fmt.Errorf("search: %q facet is invalid UTF-8: %q", f.Name, x) |
| } |
| facetValue.Type = pb.FacetValue_ATOM.Enum() |
| facetValue.StringValue = proto.String(string(x)) |
| case float64: |
| if !validFloat(x) { |
| return nil, fmt.Errorf("search: numeric facet %q with invalid value %f", f.Name, x) |
| } |
| facetValue.Type = pb.FacetValue_NUMBER.Enum() |
| facetValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64)) |
| default: |
| return nil, fmt.Errorf("search: unsupported facet type: %v", reflect.TypeOf(f.Value)) |
| } |
| dst = append(dst, &pb.Facet{ |
| Name: proto.String(f.Name), |
| Value: facetValue, |
| }) |
| } |
| return dst, nil |
| } |
| |
| // loadDoc converts from protobufs to a struct pointer or |
| // FieldLoadSaver/FieldMetadataLoadSaver. The src param provides the document's |
| // stored fields and facets, and any document metadata. An additional slice of |
| // fields, exprs, may optionally be provided to contain any derived expressions |
| // requested by the developer. |
| func loadDoc(dst interface{}, src *pb.Document, exprs []*pb.Field) (err error) { |
| fields, err := protoToFields(src.Field) |
| if err != nil { |
| return err |
| } |
| facets, err := protoToFacets(src.Facet) |
| if err != nil { |
| return err |
| } |
| if len(exprs) > 0 { |
| exprFields, err := protoToFields(exprs) |
| if err != nil { |
| return err |
| } |
| // Mark each field as derived. |
| for i := range exprFields { |
| exprFields[i].Derived = true |
| } |
| fields = append(fields, exprFields...) |
| } |
| meta := &DocumentMetadata{ |
| Rank: int(src.GetOrderId()), |
| Facets: facets, |
| } |
| switch x := dst.(type) { |
| case FieldLoadSaver: |
| return x.Load(fields, meta) |
| default: |
| return loadStructWithMeta(dst, fields, meta) |
| } |
| } |
| |
| func protoToFields(fields []*pb.Field) ([]Field, error) { |
| dst := make([]Field, 0, len(fields)) |
| for _, field := range fields { |
| fieldValue := field.GetValue() |
| f := Field{ |
| Name: field.GetName(), |
| } |
| switch fieldValue.GetType() { |
| case pb.FieldValue_TEXT: |
| f.Value = fieldValue.GetStringValue() |
| f.Language = fieldValue.GetLanguage() |
| case pb.FieldValue_ATOM: |
| f.Value = Atom(fieldValue.GetStringValue()) |
| case pb.FieldValue_HTML: |
| f.Value = HTML(fieldValue.GetStringValue()) |
| f.Language = fieldValue.GetLanguage() |
| case pb.FieldValue_DATE: |
| sv := fieldValue.GetStringValue() |
| millis, err := strconv.ParseInt(sv, 10, 64) |
| if err != nil { |
| return nil, fmt.Errorf("search: internal error: bad time.Time encoding %q: %v", sv, err) |
| } |
| f.Value = time.Unix(0, millis*1e6) |
| case pb.FieldValue_NUMBER: |
| sv := fieldValue.GetStringValue() |
| x, err := strconv.ParseFloat(sv, 64) |
| if err != nil { |
| return nil, err |
| } |
| f.Value = x |
| case pb.FieldValue_GEO: |
| geoValue := fieldValue.GetGeo() |
| geoPoint := appengine.GeoPoint{geoValue.GetLat(), geoValue.GetLng()} |
| if !geoPoint.Valid() { |
| return nil, fmt.Errorf("search: internal error: invalid GeoPoint encoding: %v", geoPoint) |
| } |
| f.Value = geoPoint |
| default: |
| return nil, fmt.Errorf("search: internal error: unknown data type %s", fieldValue.GetType()) |
| } |
| dst = append(dst, f) |
| } |
| return dst, nil |
| } |
| |
| func protoToFacets(facets []*pb.Facet) ([]Facet, error) { |
| if len(facets) == 0 { |
| return nil, nil |
| } |
| dst := make([]Facet, 0, len(facets)) |
| for _, facet := range facets { |
| facetValue := facet.GetValue() |
| f := Facet{ |
| Name: facet.GetName(), |
| } |
| switch facetValue.GetType() { |
| case pb.FacetValue_ATOM: |
| f.Value = Atom(facetValue.GetStringValue()) |
| case pb.FacetValue_NUMBER: |
| sv := facetValue.GetStringValue() |
| x, err := strconv.ParseFloat(sv, 64) |
| if err != nil { |
| return nil, err |
| } |
| f.Value = x |
| default: |
| return nil, fmt.Errorf("search: internal error: unknown data type %s", facetValue.GetType()) |
| } |
| dst = append(dst, f) |
| } |
| return dst, nil |
| } |
| |
| func namespaceMod(m proto.Message, namespace string) { |
| set := func(s **string) { |
| if *s == nil { |
| *s = &namespace |
| } |
| } |
| switch m := m.(type) { |
| case *pb.IndexDocumentRequest: |
| set(&m.Params.IndexSpec.Namespace) |
| case *pb.ListDocumentsRequest: |
| set(&m.Params.IndexSpec.Namespace) |
| case *pb.DeleteDocumentRequest: |
| set(&m.Params.IndexSpec.Namespace) |
| case *pb.SearchRequest: |
| set(&m.Params.IndexSpec.Namespace) |
| } |
| } |
| |
| func init() { |
| internal.RegisterErrorCodeMap("search", pb.SearchServiceError_ErrorCode_name) |
| internal.NamespaceMods["search"] = namespaceMod |
| } |