blob: 9d22bf1915ee39d2b39d3f1130d94bff16456533 [file] [log] [blame]
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package unify implements unification of structured values.
//
// A [Value] represents a possibly infinite set of concrete values, where a
// value is either a string ([String]), a tuple of values ([Tuple]), or a
// string-keyed map of values called a "def" ([Def]). These sets can be further
// constrained by variables ([Var]). A [Value] combined with bindings of
// variables is a [Closure].
//
// [Unify] finds a [Closure] that satisfies two or more other [Closure]s. This
// can be thought of as intersecting the sets represented by these Closures'
// values, or as the greatest lower bound/infimum of these Closures. If no such
// Closure exists, the result of unification is "bottom", or the empty set.
//
// # Examples
//
// The regular expression "a*" is the infinite set of strings of zero or more
// "a"s. "a*" can be unified with "a" or "aa" or "aaa", and the result is just
// "a", "aa", or "aaa", respectively. However, unifying "a*" with "b" fails
// because there are no values that satisfy both.
//
// Sums express sets directly. For example, !sum [a, b] is the set consisting of
// "a" and "b". Unifying this with !sum [b, c] results in just "b". This also
// makes it easy to demonstrate that unification isn't necessarily a single
// concrete value. For example, unifying !sum [a, b, c] with !sum [b, c, d]
// results in two concrete values: "b" and "c".
//
// The special value _ or "top" represents all possible values. Unifying _ with
// any value x results in x.
//
// Unifying composite values—tuples and defs—unifies their elements.
//
// The value [a*, aa] is an infinite set of tuples. If we unify that with the
// value [aaa, a*], the only possible value that satisfies both is [aaa, aa].
// Likewise, this is the intersection of the sets described by these two values.
//
// Defs are similar to tuples, but they are indexed by strings and don't have a
// fixed length. For example, {x: a, y: b} is a def with two fields. Any field
// not mentioned in a def is implicitly top. Thus, unifying this with {y: b, z:
// c} results in {x: a, y: b, z: c}.
//
// Variables constrain values. For example, the value [$x, $x] represents all
// tuples whose first and second values are the same, but doesn't otherwise
// constrain that value. Thus, this set includes [a, a] as well as [[b, c, d],
// [b, c, d]], but it doesn't include [a, b].
//
// Sums are internally implemented as fresh variables that are simultaneously
// bound to all values of the sum. That is !sum [a, b] is actually $var (where
// var is some fresh name), closed under the environment $var=a | $var=b.
package unify
import (
"errors"
"fmt"
"slices"
)
// Unify computes a Closure that satisfies each input Closure. If no such
// Closure exists, it returns bottom.
func Unify(closures ...Closure) (Closure, error) {
if len(closures) == 0 {
return Closure{topValue, topEnv}, nil
}
var trace *tracer
if Debug.UnifyLog != nil || Debug.HTML != nil {
trace = &tracer{
logw: Debug.UnifyLog,
saveTree: Debug.HTML != nil,
}
}
unified := closures[0]
for _, c := range closures[1:] {
var err error
uf := newUnifier()
uf.tracer = trace
e := crossEnvs(unified.env, c.env)
unified.val, unified.env, err = unified.val.unify(c.val, e, false, uf)
if Debug.HTML != nil {
uf.writeHTML(Debug.HTML)
}
if err != nil {
return Closure{}, err
}
}
return unified, nil
}
type unifier struct {
*tracer
}
func newUnifier() *unifier {
return &unifier{}
}
// errDomains is a sentinel error used between unify and unify1 to indicate that
// unify1 could not unify the domains of the two values.
var errDomains = errors.New("cannot unify domains")
func (v *Value) unify(w *Value, e envSet, swap bool, uf *unifier) (*Value, envSet, error) {
if swap {
// Put the values in order. This just happens to be a handy choke-point
// to do this at.
v, w = w, v
}
uf.traceUnify(v, w, e)
d, e2, err := v.unify1(w, e, false, uf)
if err == errDomains {
// Try the other order.
d, e2, err = w.unify1(v, e, true, uf)
if err == errDomains {
// Okay, we really can't unify these.
err = fmt.Errorf("cannot unify %T (%s) and %T (%s): kind mismatch", v.Domain, v.PosString(), w.Domain, w.PosString())
}
}
if err != nil {
uf.traceDone(nil, envSet{}, err)
return nil, envSet{}, err
}
res := unified(d, v, w)
uf.traceDone(res, e2, nil)
if d == nil {
// Double check that a bottom Value also has a bottom env.
if !e2.isEmpty() {
panic("bottom Value has non-bottom environment")
}
}
return res, e2, nil
}
func (v *Value) unify1(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
// TODO: If there's an error, attach position information to it.
vd, wd := v.Domain, w.Domain
// Bottom returns bottom, and eliminates all possible environments.
if vd == nil || wd == nil {
return nil, bottomEnv, nil
}
// Top always returns the other.
if _, ok := vd.(Top); ok {
return wd, e, nil
}
// Variables
if vd, ok := vd.(Var); ok {
return vd.unify(w, e, swap, uf)
}
// Composite values
if vd, ok := vd.(Def); ok {
if wd, ok := wd.(Def); ok {
return vd.unify(wd, e, swap, uf)
}
}
if vd, ok := vd.(Tuple); ok {
if wd, ok := wd.(Tuple); ok {
return vd.unify(wd, e, swap, uf)
}
}
// Scalar values
if vd, ok := vd.(String); ok {
if wd, ok := wd.(String); ok {
res := vd.unify(wd)
if res == nil {
e = bottomEnv
}
return res, e, nil
}
}
return nil, envSet{}, errDomains
}
func (d Def) unify(o Def, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
out := Def{fields: make(map[string]*Value)}
// Check keys of d against o.
for key, dv := range d.All() {
ov, ok := o.fields[key]
if !ok {
// ov is implicitly Top. Bypass unification.
out.fields[key] = dv
continue
}
exit := uf.enter("%s", key)
res, e2, err := dv.unify(ov, e, swap, uf)
exit.exit()
if err != nil {
return nil, envSet{}, err
} else if res.Domain == nil {
// No match.
return nil, bottomEnv, nil
}
out.fields[key] = res
e = e2
}
// Check keys of o that we didn't already check. These all implicitly match
// because we know the corresponding fields in d are all Top.
for key, dv := range o.All() {
if _, ok := d.fields[key]; !ok {
out.fields[key] = dv
}
}
return out, e, nil
}
func (v Tuple) unify(w Tuple, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
if v.repeat != nil && w.repeat != nil {
// Since we generate the content of these lazily, there's not much we
// can do but just stick them on a list to unify later.
return Tuple{repeat: concat(v.repeat, w.repeat)}, e, nil
}
// Expand any repeated tuples.
tuples := make([]Tuple, 0, 2)
if v.repeat == nil {
tuples = append(tuples, v)
} else {
v2, e2 := v.doRepeat(e, len(w.vs))
tuples = append(tuples, v2...)
e = e2
}
if w.repeat == nil {
tuples = append(tuples, w)
} else {
w2, e2 := w.doRepeat(e, len(v.vs))
tuples = append(tuples, w2...)
e = e2
}
// Now unify all of the tuples (usually this will be just 2 tuples)
out := tuples[0]
for _, t := range tuples[1:] {
if len(out.vs) != len(t.vs) {
uf.logf("tuple length mismatch")
return nil, bottomEnv, nil
}
zs := make([]*Value, len(out.vs))
for i, v1 := range out.vs {
exit := uf.enter("%d", i)
z, e2, err := v1.unify(t.vs[i], e, swap, uf)
exit.exit()
if err != nil {
return nil, envSet{}, err
} else if z.Domain == nil {
return nil, bottomEnv, nil
}
zs[i] = z
e = e2
}
out = Tuple{vs: zs}
}
return out, e, nil
}
// doRepeat creates a fixed-length tuple from a repeated tuple. The caller is
// expected to unify the returned tuples.
func (v Tuple) doRepeat(e envSet, n int) ([]Tuple, envSet) {
res := make([]Tuple, len(v.repeat))
for i, gen := range v.repeat {
res[i].vs = make([]*Value, n)
for j := range n {
res[i].vs[j], e = gen(e)
}
}
return res, e
}
// unify intersects the domains of two [String]s. If it can prove that this
// domain is empty, it returns nil (bottom).
//
// TODO: Consider splitting literals and regexps into two domains.
func (v String) unify(w String) Domain {
// Unification is symmetric, so put them in order of string kind so we only
// have to deal with half the cases.
if v.kind > w.kind {
v, w = w, v
}
switch v.kind {
case stringRegex:
switch w.kind {
case stringRegex:
// Construct a match against all of the regexps
return String{kind: stringRegex, re: slices.Concat(v.re, w.re)}
case stringExact:
for _, re := range v.re {
if !re.MatchString(w.exact) {
return nil
}
}
return w
}
case stringExact:
if v.exact != w.exact {
return nil
}
return v
}
panic("bad string kind")
}
func concat[T any](s1, s2 []T) []T {
// Reuse s1 or s2 if possible.
if len(s1) == 0 {
return s2
}
return append(s1[:len(s1):len(s1)], s2...)
}