| // Copyright 2025 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package unify |
| |
| import ( |
| "fmt" |
| "iter" |
| "reflect" |
| "strings" |
| ) |
| |
| // An envSet is an immutable set of environments, where each environment is a |
| // mapping from [ident]s to [Value]s. |
| // |
| // To keep this compact, we use an algebraic representation similar to |
| // relational algebra. The atoms are zero, unit, or a singular binding: |
| // |
| // - A singular binding {x: v} is an environment set consisting of a single |
| // environment that binds a single ident x to a single value v. |
| // |
| // - Zero (0) is the empty set. |
| // |
| // - Unit (1) is an environment set consisting of a single, empty environment |
| // (no bindings). |
| // |
| // From these, we build up more complex sets of environments using sums and |
| // cross products: |
| // |
| // - A sum, E + F, is simply the union of the two environment sets: E ∪ F |
| // |
| // - A cross product, E ⨯ F, is the Cartesian product of the two environment |
| // sets, followed by joining each pair of environments: {e ⊕ f | (e, f) ∊ E ⨯ F} |
| // |
| // The join of two environments, e ⊕ f, is an environment that contains all of |
| // the bindings in either e or f. To detect bugs, it is an error if an |
| // identifier is bound in both e and f (however, see below for what we could do |
| // differently). |
| // |
| // Environment sets form a commutative semiring and thus obey the usual |
| // commutative semiring rules: |
| // |
| // e + 0 = e |
| // e ⨯ 0 = 0 |
| // e ⨯ 1 = e |
| // e + f = f + e |
| // e ⨯ f = f ⨯ e |
| // |
| // Furthermore, environments sets are additively and multiplicatively idempotent |
| // because + and ⨯ are themselves defined in terms of sets: |
| // |
| // e + e = e |
| // e ⨯ e = e |
| // |
| // # Examples |
| // |
| // To represent {{x: 1, y: 1}, {x: 2, y: 2}}, we build the two environments and |
| // sum them: |
| // |
| // ({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2}) |
| // |
| // If we add a third variable z that can be 1 or 2, independent of x and y, we |
| // get four logical environments: |
| // |
| // {x: 1, y: 1, z: 1} |
| // {x: 2, y: 2, z: 1} |
| // {x: 1, y: 1, z: 2} |
| // {x: 2, y: 2, z: 2} |
| // |
| // This could be represented as a sum of all four environments, but because z is |
| // independent, we can use a more compact representation: |
| // |
| // (({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2})) ⨯ ({z: 1} + {z: 2}) |
| // |
| // # Generalized cross product |
| // |
| // While cross-product is currently restricted to disjoint environments, we |
| // could generalize the definition of joining two environments to: |
| // |
| // {xₖ: vₖ} ⊕ {xₖ: wₖ} = {xₖ: vₖ ∩ wₖ} (where unbound idents are bound to the [Top] value, ⟙) |
| // |
| // where v ∩ w is the unification of v and w. This itself could be coarsened to |
| // |
| // v ∩ w = v if w = ⟙ |
| // = w if v = ⟙ |
| // = v if v = w |
| // = 0 otherwise |
| // |
| // We could use this rule to implement substitution. For example, E ⨯ {x: 1} |
| // narrows environment set E to only environments in which x is bound to 1. But |
| // we currently don't do this. |
| type envSet struct { |
| root *envExpr |
| } |
| |
| type envExpr struct { |
| // TODO: A tree-based data structure for this may not be ideal, since it |
| // involves a lot of walking to find things and we often have to do deep |
| // rewrites anyway for partitioning. Would some flattened array-style |
| // representation be better, possibly combined with an index of ident uses? |
| // We could even combine that with an immutable array abstraction (ala |
| // Clojure) that could enable more efficient construction operations. |
| |
| kind envExprKind |
| |
| // For envBinding |
| id *ident |
| val *Value |
| |
| // For sum or product. Len must be >= 2 and none of the elements can have |
| // the same kind as this node. |
| operands []*envExpr |
| } |
| |
| type envExprKind byte |
| |
| const ( |
| envZero envExprKind = iota |
| envUnit |
| envProduct |
| envSum |
| envBinding |
| ) |
| |
| var ( |
| // topEnv is the unit value (multiplicative identity) of a [envSet]. |
| topEnv = envSet{envExprUnit} |
| // bottomEnv is the zero value (additive identity) of a [envSet]. |
| bottomEnv = envSet{envExprZero} |
| |
| envExprZero = &envExpr{kind: envZero} |
| envExprUnit = &envExpr{kind: envUnit} |
| ) |
| |
| // bind binds id to each of vals in e. |
| // |
| // Its panics if id is already bound in e. |
| // |
| // Environments are typically initially constructed by starting with [topEnv] |
| // and calling bind one or more times. |
| func (e envSet) bind(id *ident, vals ...*Value) envSet { |
| if e.isEmpty() { |
| return bottomEnv |
| } |
| |
| // TODO: If any of vals are _, should we just drop that val? We're kind of |
| // inconsistent about whether an id missing from e means id is invalid or |
| // means id is _. |
| |
| // Check that id isn't present in e. |
| for range e.root.bindings(id) { |
| panic("id " + id.name + " already present in environment") |
| } |
| |
| // Create a sum of all the values. |
| bindings := make([]*envExpr, 0, 1) |
| for _, val := range vals { |
| bindings = append(bindings, &envExpr{kind: envBinding, id: id, val: val}) |
| } |
| |
| // Multiply it in. |
| return envSet{newEnvExprProduct(e.root, newEnvExprSum(bindings...))} |
| } |
| |
| func (e envSet) isEmpty() bool { |
| return e.root.kind == envZero |
| } |
| |
| // bindings yields all [envBinding] nodes in e with the given id. If id is nil, |
| // it yields all binding nodes. |
| func (e *envExpr) bindings(id *ident) iter.Seq[*envExpr] { |
| // This is just a pre-order walk and it happens this is the only thing we |
| // need a pre-order walk for. |
| return func(yield func(*envExpr) bool) { |
| var rec func(e *envExpr) bool |
| rec = func(e *envExpr) bool { |
| if e.kind == envBinding && (id == nil || e.id == id) { |
| if !yield(e) { |
| return false |
| } |
| } |
| for _, o := range e.operands { |
| if !rec(o) { |
| return false |
| } |
| } |
| return true |
| } |
| rec(e) |
| } |
| } |
| |
| // newEnvExprProduct constructs a product node from exprs, performing |
| // simplifications. It does NOT check that bindings are disjoint. |
| func newEnvExprProduct(exprs ...*envExpr) *envExpr { |
| factors := make([]*envExpr, 0, 2) |
| for _, expr := range exprs { |
| switch expr.kind { |
| case envZero: |
| return envExprZero |
| case envUnit: |
| // No effect on product |
| case envProduct: |
| factors = append(factors, expr.operands...) |
| default: |
| factors = append(factors, expr) |
| } |
| } |
| |
| if len(factors) == 0 { |
| return envExprUnit |
| } else if len(factors) == 1 { |
| return factors[0] |
| } |
| return &envExpr{kind: envProduct, operands: factors} |
| } |
| |
| // newEnvExprSum constructs a sum node from exprs, performing simplifications. |
| func newEnvExprSum(exprs ...*envExpr) *envExpr { |
| // TODO: If all of envs are products (or bindings), factor any common terms. |
| // E.g., x * y + x * z ==> x * (y + z). This is easy to do for binding |
| // terms, but harder to do for more general terms. |
| |
| var have smallSet[*envExpr] |
| terms := make([]*envExpr, 0, 2) |
| for _, expr := range exprs { |
| switch expr.kind { |
| case envZero: |
| // No effect on sum |
| case envSum: |
| for _, expr1 := range expr.operands { |
| if have.Add(expr1) { |
| terms = append(terms, expr1) |
| } |
| } |
| default: |
| if have.Add(expr) { |
| terms = append(terms, expr) |
| } |
| } |
| } |
| |
| if len(terms) == 0 { |
| return envExprZero |
| } else if len(terms) == 1 { |
| return terms[0] |
| } |
| return &envExpr{kind: envSum, operands: terms} |
| } |
| |
| func crossEnvs(env1, env2 envSet) envSet { |
| // Confirm that envs have disjoint idents. |
| var ids1 smallSet[*ident] |
| for e := range env1.root.bindings(nil) { |
| ids1.Add(e.id) |
| } |
| for e := range env2.root.bindings(nil) { |
| if ids1.Has(e.id) { |
| panic(fmt.Sprintf("%s bound on both sides of cross-product", e.id.name)) |
| } |
| } |
| |
| return envSet{newEnvExprProduct(env1.root, env2.root)} |
| } |
| |
| func unionEnvs(envs ...envSet) envSet { |
| exprs := make([]*envExpr, len(envs)) |
| for i := range envs { |
| exprs[i] = envs[i].root |
| } |
| return envSet{newEnvExprSum(exprs...)} |
| } |
| |
| // envPartition is a subset of an env where id is bound to value in all |
| // deterministic environments. |
| type envPartition struct { |
| id *ident |
| value *Value |
| env envSet |
| } |
| |
| // partitionBy splits e by distinct bindings of id and removes id from each |
| // partition. |
| // |
| // If there are environments in e where id is not bound, they will not be |
| // reflected in any partition. |
| // |
| // It panics if e is bottom, since attempting to partition an empty environment |
| // set almost certainly indicates a bug. |
| func (e envSet) partitionBy(id *ident) []envPartition { |
| if e.isEmpty() { |
| // We could return zero partitions, but getting here at all almost |
| // certainly indicates a bug. |
| panic("cannot partition empty environment set") |
| } |
| |
| // Emit a partition for each value of id. |
| var seen smallSet[*Value] |
| var parts []envPartition |
| for n := range e.root.bindings(id) { |
| if !seen.Add(n.val) { |
| // Already emitted a partition for this value. |
| continue |
| } |
| |
| parts = append(parts, envPartition{ |
| id: id, |
| value: n.val, |
| env: envSet{e.root.substitute(id, n.val)}, |
| }) |
| } |
| |
| return parts |
| } |
| |
| // substitute replaces bindings of id to val with 1 and bindings of id to any |
| // other value with 0 and simplifies the result. |
| func (e *envExpr) substitute(id *ident, val *Value) *envExpr { |
| switch e.kind { |
| default: |
| panic("bad kind") |
| |
| case envZero, envUnit: |
| return e |
| |
| case envBinding: |
| if e.id != id { |
| return e |
| } else if e.val != val { |
| return envExprZero |
| } else { |
| return envExprUnit |
| } |
| |
| case envProduct, envSum: |
| // Substitute each operand. Sometimes, this won't change anything, so we |
| // build the new operands list lazily. |
| var nOperands []*envExpr |
| for i, op := range e.operands { |
| nOp := op.substitute(id, val) |
| if nOperands == nil && op != nOp { |
| // Operand diverged; initialize nOperands. |
| nOperands = make([]*envExpr, 0, len(e.operands)) |
| nOperands = append(nOperands, e.operands[:i]...) |
| } |
| if nOperands != nil { |
| nOperands = append(nOperands, nOp) |
| } |
| } |
| if nOperands == nil { |
| // Nothing changed. |
| return e |
| } |
| if e.kind == envProduct { |
| return newEnvExprProduct(nOperands...) |
| } else { |
| return newEnvExprSum(nOperands...) |
| } |
| } |
| } |
| |
| // A smallSet is a set optimized for stack allocation when small. |
| type smallSet[T comparable] struct { |
| array [32]T |
| n int |
| |
| m map[T]struct{} |
| } |
| |
| // Has returns whether val is in set. |
| func (s *smallSet[T]) Has(val T) bool { |
| arr := s.array[:s.n] |
| for i := range arr { |
| if arr[i] == val { |
| return true |
| } |
| } |
| _, ok := s.m[val] |
| return ok |
| } |
| |
| // Add adds val to the set and returns true if it was added (not already |
| // present). |
| func (s *smallSet[T]) Add(val T) bool { |
| // Test for presence. |
| if s.Has(val) { |
| return false |
| } |
| |
| // Add it |
| if s.n < len(s.array) { |
| s.array[s.n] = val |
| s.n++ |
| } else { |
| if s.m == nil { |
| s.m = make(map[T]struct{}) |
| } |
| s.m[val] = struct{}{} |
| } |
| return true |
| } |
| |
| type ident struct { |
| _ [0]func() // Not comparable (only compare *ident) |
| name string |
| } |
| |
| type Var struct { |
| id *ident |
| } |
| |
| func (d Var) Exact() bool { |
| // These can't appear in concrete Values. |
| panic("Exact called on non-concrete Value") |
| } |
| |
| func (d Var) WhyNotExact() string { |
| // These can't appear in concrete Values. |
| return "WhyNotExact called on non-concrete Value" |
| } |
| |
| func (d Var) decode(rv reflect.Value) error { |
| return &inexactError{"var", rv.Type().String()} |
| } |
| |
| func (d Var) unify(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) { |
| // TODO: Vars from !sums in the input can have a huge number of values. |
| // Unifying these could be way more efficient with some indexes over any |
| // exact values we can pull out, like Def fields that are exact Strings. |
| // Maybe we try to produce an array of yes/no/maybe matches and then we only |
| // have to do deeper evaluation of the maybes. We could probably cache this |
| // on an envTerm. It may also help to special-case Var/Var unification to |
| // pick which one to index versus enumerate. |
| |
| if vd, ok := w.Domain.(Var); ok && d.id == vd.id { |
| // Unifying $x with $x results in $x. If we descend into this we'll have |
| // problems because we strip $x out of the environment to keep ourselves |
| // honest and then can't find it on the other side. |
| // |
| // TODO: I'm not positive this is the right fix. |
| return vd, e, nil |
| } |
| |
| // We need to unify w with the value of d in each possible environment. We |
| // can save some work by grouping environments by the value of d, since |
| // there will be a lot of redundancy here. |
| var nEnvs []envSet |
| envParts := e.partitionBy(d.id) |
| for i, envPart := range envParts { |
| exit := uf.enterVar(d.id, i) |
| // Each branch logically gets its own copy of the initial environment |
| // (narrowed down to just this binding of the variable), and each branch |
| // may result in different changes to that starting environment. |
| res, e2, err := w.unify(envPart.value, envPart.env, swap, uf) |
| exit.exit() |
| if err != nil { |
| return nil, envSet{}, err |
| } |
| if res.Domain == nil { |
| // This branch entirely failed to unify, so it's gone. |
| continue |
| } |
| nEnv := e2.bind(d.id, res) |
| nEnvs = append(nEnvs, nEnv) |
| } |
| |
| if len(nEnvs) == 0 { |
| // All branches failed |
| return nil, bottomEnv, nil |
| } |
| |
| // The effect of this is entirely captured in the environment. We can return |
| // back the same Bind node. |
| return d, unionEnvs(nEnvs...), nil |
| } |
| |
| // An identPrinter maps [ident]s to unique string names. |
| type identPrinter struct { |
| ids map[*ident]string |
| idGen map[string]int |
| } |
| |
| func (p *identPrinter) unique(id *ident) string { |
| if p.ids == nil { |
| p.ids = make(map[*ident]string) |
| p.idGen = make(map[string]int) |
| } |
| |
| name, ok := p.ids[id] |
| if !ok { |
| gen := p.idGen[id.name] |
| p.idGen[id.name]++ |
| if gen == 0 { |
| name = id.name |
| } else { |
| name = fmt.Sprintf("%s#%d", id.name, gen) |
| } |
| p.ids[id] = name |
| } |
| |
| return name |
| } |
| |
| func (p *identPrinter) slice(ids []*ident) string { |
| var strs []string |
| for _, id := range ids { |
| strs = append(strs, p.unique(id)) |
| } |
| return fmt.Sprintf("[%s]", strings.Join(strs, ", ")) |
| } |