blob: fa3e3d2ad50d3df70118360a101ebbaa76303e35 [file] [log] [blame]
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file deals with preparing a schema for validation, including various checks,
// optimizations, and the resolution of cross-schema references.
package jsonschema
import (
"errors"
"fmt"
"net/url"
"regexp"
"strings"
)
// A Resolved consists of a [Schema] along with associated information needed to
// validate documents against it.
// A Resolved has been validated against its meta-schema, and all its references
// (the $ref and $dynamicRef keywords) have been resolved to their referenced Schemas.
// Call [Schema.Resolve] to obtain a Resolved from a Schema.
type Resolved struct {
root *Schema
// map from $ids to their schemas
resolvedURIs map[string]*Schema
}
// A Loader reads and unmarshals the schema at uri, if any.
type Loader func(uri *url.URL) (*Schema, error)
// Resolve resolves all references within the schema and performs other tasks that
// prepare the schema for validation.
//
// baseURI can be empty, or an absolute URI (one that starts with a scheme).
// It is resolved (in the URI sense; see [url.ResolveReference]) with root's $id property.
// If the resulting URI is not absolute, then the schema cannot not contain relative URI references.
//
// loader loads schemas that are referred to by a $ref but not under root (a remote reference).
// If nil, remote references will return an error.
func (root *Schema) Resolve(baseURI string, loader Loader) (*Resolved, error) {
// There are four steps involved in preparing a schema to validate.
// 1. Load: read the schema from somewhere and unmarshal it.
// This schema (root) may have been loaded or created in memory, but other schemas that
// come into the picture in step 4 will be loaded by the given loader.
// 2. Check: validate the schema against a meta-schema, and perform other well-formedness checks.
// Precompute some values along the way.
// 3. Resolve URIs: determine the base URI of the root and all its subschemas, and
// resolve (in the URI sense) all identifiers and anchors with their bases. This step results
// in a map from URIs to schemas within root.
// These three steps are idempotent. They may occur a several times on a schema, if
// it is loaded from several places.
// 4. Resolve references: all refs in the schemas are replaced with the schema they refer to.
var base *url.URL
if baseURI == "" {
base = &url.URL{} // so we can call ResolveReference on it
} else {
var err error
base, err = url.Parse(baseURI)
if err != nil {
return nil, fmt.Errorf("parsing base URI: %w", err)
}
}
if loader == nil {
loader = func(uri *url.URL) (*Schema, error) {
return nil, errors.New("cannot resolve remote schemas: no loader passed to Schema.Resolve")
}
}
r := &resolver{
loader: loader,
loaded: map[string]*Resolved{},
}
return r.resolve(root, base)
// TODO: before we return, throw away anything we don't need for validation.
}
// A resolver holds the state for resolution.
type resolver struct {
loader Loader
// A cache of loaded and partly resolved schemas. (They may not have had their
// refs resolved.) The cache ensures that the loader will never be called more
// than once with the same URI, and that reference cycles are handled properly.
loaded map[string]*Resolved
}
func (r *resolver) resolve(s *Schema, baseURI *url.URL) (*Resolved, error) {
if baseURI.Fragment != "" {
return nil, fmt.Errorf("base URI %s must not have a fragment", baseURI)
}
if err := s.check(); err != nil {
return nil, err
}
m, err := resolveURIs(s, baseURI)
if err != nil {
return nil, err
}
rs := &Resolved{root: s, resolvedURIs: m}
// Remember the schema by both the URI we loaded it from and its canonical name,
// which may differ if the schema has an $id.
// We must set the map before calling resolveRefs, or ref cycles will cause unbounded recursion.
r.loaded[baseURI.String()] = rs
r.loaded[s.uri.String()] = rs
if err := r.resolveRefs(rs); err != nil {
return nil, err
}
return rs, nil
}
func (root *Schema) check() error {
if root == nil {
return errors.New("nil schema")
}
var errs []error
report := func(err error) { errs = append(errs, err) }
seen := map[*Schema]bool{}
for ss := range root.all() {
if seen[ss] {
// The schema graph rooted at s is not a tree, but it needs to
// be because we assume a unique parent when we store a schema's base
// in the Schema. A cycle would also put Schema.all into an infinite
// recursion.
return fmt.Errorf("schemas rooted at %s do not form a tree (saw %s twice)", root, ss)
}
seen[ss] = true
ss.checkLocal(report)
}
return errors.Join(errs...)
}
// checkLocal checks s for validity, independently of other schemas it may refer to.
// Since checking a regexp involves compiling it, checkLocal saves those compiled regexps
// in the schema for later use.
// It appends the errors it finds to errs.
func (s *Schema) checkLocal(report func(error)) {
addf := func(format string, args ...any) {
report(fmt.Errorf("jsonschema.Schema: "+format, args...))
}
if s == nil {
addf("nil subschema")
return
}
if err := s.basicChecks(); err != nil {
report(err)
return
}
// TODO: validate the schema's properties,
// ideally by jsonschema-validating it against the meta-schema.
// Some properties are present so that Schemas can round-trip, but we do not
// validate them.
// Currently, it's just the $vocabulary property.
// As a special case, we can validate the 2020-12 meta-schema.
if s.Vocabulary != nil && s.Schema != draft202012 {
addf("cannot validate a schema with $vocabulary")
}
// Check and compile regexps.
if s.Pattern != "" {
re, err := regexp.Compile(s.Pattern)
if err != nil {
addf("pattern: %w", err)
} else {
s.pattern = re
}
}
if len(s.PatternProperties) > 0 {
s.patternProperties = map[*regexp.Regexp]*Schema{}
for reString, subschema := range s.PatternProperties {
re, err := regexp.Compile(reString)
if err != nil {
addf("patternProperties[%q]: %w", reString, err)
continue
}
s.patternProperties[re] = subschema
}
}
// Build a set of required properties, to avoid quadratic behavior when validating
// a struct.
if len(s.Required) > 0 {
s.isRequired = map[string]bool{}
for _, r := range s.Required {
s.isRequired[r] = true
}
}
}
// resolveURIs resolves the ids and anchors in all the schemas of root, relative
// to baseURI.
// See https://json-schema.org/draft/2020-12/json-schema-core#section-8.2, section
// 8.2.1.
// TODO(jba): dynamicAnchors (ยง8.2.2)
//
// Every schema has a base URI and a parent base URI.
//
// The parent base URI is the base URI of the lexically enclosing schema, or for
// a root schema, the URI it was loaded from or the one supplied to [Schema.Resolve].
//
// If the schema has no $id property, the base URI of a schema is that of its parent.
// If the schema does have an $id, it must be a URI, possibly relative. The schema's
// base URI is the $id resolved (in the sense of [url.URL.ResolveReference]) against
// the parent base.
//
// As an example, consider this schema loaded from http://a.com/root.json (quotes omitted):
//
// {
// allOf: [
// {$id: "sub1.json", minLength: 5},
// {$id: "http://b.com", minimum: 10},
// {not: {maximum: 20}}
// ]
// }
//
// The base URIs are as follows. Schema locations are expressed in the JSON Pointer notation.
//
// schema base URI
// root http://a.com/root.json
// allOf/0 http://a.com/sub1.json
// allOf/1 http://b.com (absolute $id; doesn't matter that it's not under the loaded URI)
// allOf/2 http://a.com/root.json (inherited from parent)
// allOf/2/not http://a.com/root.json (inherited from parent)
func resolveURIs(root *Schema, baseURI *url.URL) (map[string]*Schema, error) {
resolvedURIs := map[string]*Schema{}
var resolve func(s, base *Schema) error
resolve = func(s, base *Schema) error {
// ids are scoped to the root.
if s.ID != "" {
// A non-empty ID establishes a new base.
idURI, err := url.Parse(s.ID)
if err != nil {
return err
}
if idURI.Fragment != "" {
return fmt.Errorf("$id %s must not have a fragment", s.ID)
}
// The base URI for this schema is its $id resolved against the parent base.
s.uri = base.uri.ResolveReference(idURI)
if !s.uri.IsAbs() {
return fmt.Errorf("$id %s does not resolve to an absolute URI (base is %s)", s.ID, s.base.uri)
}
resolvedURIs[s.uri.String()] = s
base = s // needed for anchors
}
s.base = base
// Anchors and dynamic anchors are URI fragments that are scoped to their base.
// We treat them as keys in a map stored within the schema.
setAnchor := func(anchor string, dynamic bool) error {
if anchor != "" {
if _, ok := base.anchors[anchor]; ok {
return fmt.Errorf("duplicate anchor %q in %s", anchor, base.uri)
}
if base.anchors == nil {
base.anchors = map[string]anchorInfo{}
}
base.anchors[anchor] = anchorInfo{s, dynamic}
}
return nil
}
setAnchor(s.Anchor, false)
setAnchor(s.DynamicAnchor, true)
for c := range s.children() {
if err := resolve(c, base); err != nil {
return err
}
}
return nil
}
// Set the root URI to the base for now. If the root has an $id, this will change.
root.uri = baseURI
// The original base, even if changed, is still a valid way to refer to the root.
resolvedURIs[baseURI.String()] = root
if err := resolve(root, root); err != nil {
return nil, err
}
return resolvedURIs, nil
}
// resolveRefs replaces every ref in the schemas with the schema it refers to.
// A reference that doesn't resolve within the schema may refer to some other schema
// that needs to be loaded.
func (r *resolver) resolveRefs(rs *Resolved) error {
for s := range rs.root.all() {
if s.Ref != "" {
refSchema, _, err := r.resolveRef(rs, s, s.Ref)
if err != nil {
return err
}
// Whether or not the anchor referred to by $ref fragment is dynamic,
// the ref still treats it lexically.
s.resolvedRef = refSchema
}
if s.DynamicRef != "" {
refSchema, frag, err := r.resolveRef(rs, s, s.DynamicRef)
if err != nil {
return err
}
if frag != "" {
// The dynamic ref's fragment points to a dynamic anchor.
// We must resolve the fragment at validation time.
s.dynamicRefAnchor = frag
} else {
// There is no dynamic anchor in the lexically referenced schema,
// so the dynamic ref behaves like a lexical ref.
s.resolvedDynamicRef = refSchema
}
}
}
return nil
}
// resolveRef resolves the reference ref, which is either s.Ref or s.DynamicRef.
func (r *resolver) resolveRef(rs *Resolved, s *Schema, ref string) (_ *Schema, dynamicFragment string, err error) {
refURI, err := url.Parse(ref)
if err != nil {
return nil, "", err
}
// URI-resolve the ref against the current base URI to get a complete URI.
refURI = s.base.uri.ResolveReference(refURI)
// The non-fragment part of a ref URI refers to the base URI of some schema.
// This part is the same for dynamic refs too: their non-fragment part resolves
// lexically.
u := *refURI
u.Fragment = ""
fraglessRefURI := &u
// Look it up locally.
referencedSchema := rs.resolvedURIs[fraglessRefURI.String()]
if referencedSchema == nil {
// The schema is remote. Maybe we've already loaded it.
// We assume that the non-fragment part of refURI refers to a top-level schema
// document. That is, we don't support the case exemplified by
// http://foo.com/bar.json/baz, where the document is in bar.json and
// the reference points to a subschema within it.
// TODO: support that case.
if lrs := r.loaded[fraglessRefURI.String()]; lrs != nil {
referencedSchema = lrs.root
} else {
// Try to load the schema.
ls, err := r.loader(fraglessRefURI)
if err != nil {
return nil, "", fmt.Errorf("loading %s: %w", fraglessRefURI, err)
}
lrs, err := r.resolve(ls, fraglessRefURI)
if err != nil {
return nil, "", err
}
referencedSchema = lrs.root
assert(referencedSchema != nil, "nil referenced schema")
}
}
frag := refURI.Fragment
// Look up frag in refSchema.
// frag is either a JSON Pointer or the name of an anchor.
// A JSON Pointer is either the empty string or begins with a '/',
// whereas anchors are always non-empty strings that don't contain slashes.
if frag != "" && !strings.HasPrefix(frag, "/") {
info, found := referencedSchema.anchors[frag]
if !found {
return nil, "", fmt.Errorf("no anchor %q in %s", frag, s)
}
if info.dynamic {
dynamicFragment = frag
}
return info.schema, dynamicFragment, nil
}
// frag is a JSON Pointer.
s, err = dereferenceJSONPointer(referencedSchema, frag)
return s, "", err
}