Source file src/simd/_gen/unify/env.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package unify
     6  
     7  import (
     8  	"fmt"
     9  	"iter"
    10  	"reflect"
    11  	"strings"
    12  )
    13  
    14  // An envSet is an immutable set of environments, where each environment is a
    15  // mapping from [ident]s to [Value]s.
    16  //
    17  // To keep this compact, we use an algebraic representation similar to
    18  // relational algebra. The atoms are zero, unit, or a singular binding:
    19  //
    20  // - A singular binding {x: v} is an environment set consisting of a single
    21  // environment that binds a single ident x to a single value v.
    22  //
    23  // - Zero (0) is the empty set.
    24  //
    25  // - Unit (1) is an environment set consisting of a single, empty environment
    26  // (no bindings).
    27  //
    28  // From these, we build up more complex sets of environments using sums and
    29  // cross products:
    30  //
    31  // - A sum, E + F, is simply the union of the two environment sets: E ∪ F
    32  //
    33  // - A cross product, E ⨯ F, is the Cartesian product of the two environment
    34  // sets, followed by joining each pair of environments: {e ⊕ f | (e, f) ∊ E ⨯ F}
    35  //
    36  // The join of two environments, e ⊕ f, is an environment that contains all of
    37  // the bindings in either e or f. To detect bugs, it is an error if an
    38  // identifier is bound in both e and f (however, see below for what we could do
    39  // differently).
    40  //
    41  // Environment sets form a commutative semiring and thus obey the usual
    42  // commutative semiring rules:
    43  //
    44  //	e + 0 = e
    45  //	e ⨯ 0 = 0
    46  //	e ⨯ 1 = e
    47  //	e + f = f + e
    48  //	e ⨯ f = f ⨯ e
    49  //
    50  // Furthermore, environments sets are additively and multiplicatively idempotent
    51  // because + and ⨯ are themselves defined in terms of sets:
    52  //
    53  //	e + e = e
    54  //	e ⨯ e = e
    55  //
    56  // # Examples
    57  //
    58  // To represent {{x: 1, y: 1}, {x: 2, y: 2}}, we build the two environments and
    59  // sum them:
    60  //
    61  //	({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2})
    62  //
    63  // If we add a third variable z that can be 1 or 2, independent of x and y, we
    64  // get four logical environments:
    65  //
    66  //	{x: 1, y: 1, z: 1}
    67  //	{x: 2, y: 2, z: 1}
    68  //	{x: 1, y: 1, z: 2}
    69  //	{x: 2, y: 2, z: 2}
    70  //
    71  // This could be represented as a sum of all four environments, but because z is
    72  // independent, we can use a more compact representation:
    73  //
    74  //	(({x: 1} ⨯ {y: 1}) + ({x: 2} ⨯ {y: 2})) ⨯ ({z: 1} + {z: 2})
    75  //
    76  // # Generalized cross product
    77  //
    78  // While cross-product is currently restricted to disjoint environments, we
    79  // could generalize the definition of joining two environments to:
    80  //
    81  //	{xₖ: vₖ} ⊕ {xₖ: wₖ} = {xₖ: vₖ ∩ wₖ} (where unbound idents are bound to the [Top] value, ⟙)
    82  //
    83  // where v ∩ w is the unification of v and w. This itself could be coarsened to
    84  //
    85  //	v ∩ w = v if w = ⟙
    86  //	      = w if v = ⟙
    87  //	      = v if v = w
    88  //	      = 0 otherwise
    89  //
    90  // We could use this rule to implement substitution. For example, E ⨯ {x: 1}
    91  // narrows environment set E to only environments in which x is bound to 1. But
    92  // we currently don't do this.
    93  type envSet struct {
    94  	root *envExpr
    95  }
    96  
    97  type envExpr struct {
    98  	// TODO: A tree-based data structure for this may not be ideal, since it
    99  	// involves a lot of walking to find things and we often have to do deep
   100  	// rewrites anyway for partitioning. Would some flattened array-style
   101  	// representation be better, possibly combined with an index of ident uses?
   102  	// We could even combine that with an immutable array abstraction (ala
   103  	// Clojure) that could enable more efficient construction operations.
   104  
   105  	kind envExprKind
   106  
   107  	// For envBinding
   108  	id  *ident
   109  	val *Value
   110  
   111  	// For sum or product. Len must be >= 2 and none of the elements can have
   112  	// the same kind as this node.
   113  	operands []*envExpr
   114  }
   115  
   116  type envExprKind byte
   117  
   118  const (
   119  	envZero envExprKind = iota
   120  	envUnit
   121  	envProduct
   122  	envSum
   123  	envBinding
   124  )
   125  
   126  var (
   127  	// topEnv is the unit value (multiplicative identity) of a [envSet].
   128  	topEnv = envSet{envExprUnit}
   129  	// bottomEnv is the zero value (additive identity) of a [envSet].
   130  	bottomEnv = envSet{envExprZero}
   131  
   132  	envExprZero = &envExpr{kind: envZero}
   133  	envExprUnit = &envExpr{kind: envUnit}
   134  )
   135  
   136  // bind binds id to each of vals in e.
   137  //
   138  // Its panics if id is already bound in e.
   139  //
   140  // Environments are typically initially constructed by starting with [topEnv]
   141  // and calling bind one or more times.
   142  func (e envSet) bind(id *ident, vals ...*Value) envSet {
   143  	if e.isEmpty() {
   144  		return bottomEnv
   145  	}
   146  
   147  	// TODO: If any of vals are _, should we just drop that val? We're kind of
   148  	// inconsistent about whether an id missing from e means id is invalid or
   149  	// means id is _.
   150  
   151  	// Check that id isn't present in e.
   152  	for range e.root.bindings(id) {
   153  		panic("id " + id.name + " already present in environment")
   154  	}
   155  
   156  	// Create a sum of all the values.
   157  	bindings := make([]*envExpr, 0, 1)
   158  	for _, val := range vals {
   159  		bindings = append(bindings, &envExpr{kind: envBinding, id: id, val: val})
   160  	}
   161  
   162  	// Multiply it in.
   163  	return envSet{newEnvExprProduct(e.root, newEnvExprSum(bindings...))}
   164  }
   165  
   166  func (e envSet) isEmpty() bool {
   167  	return e.root.kind == envZero
   168  }
   169  
   170  // bindings yields all [envBinding] nodes in e with the given id. If id is nil,
   171  // it yields all binding nodes.
   172  func (e *envExpr) bindings(id *ident) iter.Seq[*envExpr] {
   173  	// This is just a pre-order walk and it happens this is the only thing we
   174  	// need a pre-order walk for.
   175  	return func(yield func(*envExpr) bool) {
   176  		var rec func(e *envExpr) bool
   177  		rec = func(e *envExpr) bool {
   178  			if e.kind == envBinding && (id == nil || e.id == id) {
   179  				if !yield(e) {
   180  					return false
   181  				}
   182  			}
   183  			for _, o := range e.operands {
   184  				if !rec(o) {
   185  					return false
   186  				}
   187  			}
   188  			return true
   189  		}
   190  		rec(e)
   191  	}
   192  }
   193  
   194  // newEnvExprProduct constructs a product node from exprs, performing
   195  // simplifications. It does NOT check that bindings are disjoint.
   196  func newEnvExprProduct(exprs ...*envExpr) *envExpr {
   197  	factors := make([]*envExpr, 0, 2)
   198  	for _, expr := range exprs {
   199  		switch expr.kind {
   200  		case envZero:
   201  			return envExprZero
   202  		case envUnit:
   203  			// No effect on product
   204  		case envProduct:
   205  			factors = append(factors, expr.operands...)
   206  		default:
   207  			factors = append(factors, expr)
   208  		}
   209  	}
   210  
   211  	if len(factors) == 0 {
   212  		return envExprUnit
   213  	} else if len(factors) == 1 {
   214  		return factors[0]
   215  	}
   216  	return &envExpr{kind: envProduct, operands: factors}
   217  }
   218  
   219  // newEnvExprSum constructs a sum node from exprs, performing simplifications.
   220  func newEnvExprSum(exprs ...*envExpr) *envExpr {
   221  	// TODO: If all of envs are products (or bindings), factor any common terms.
   222  	// E.g., x * y + x * z ==> x * (y + z). This is easy to do for binding
   223  	// terms, but harder to do for more general terms.
   224  
   225  	var have smallSet[*envExpr]
   226  	terms := make([]*envExpr, 0, 2)
   227  	for _, expr := range exprs {
   228  		switch expr.kind {
   229  		case envZero:
   230  			// No effect on sum
   231  		case envSum:
   232  			for _, expr1 := range expr.operands {
   233  				if have.Add(expr1) {
   234  					terms = append(terms, expr1)
   235  				}
   236  			}
   237  		default:
   238  			if have.Add(expr) {
   239  				terms = append(terms, expr)
   240  			}
   241  		}
   242  	}
   243  
   244  	if len(terms) == 0 {
   245  		return envExprZero
   246  	} else if len(terms) == 1 {
   247  		return terms[0]
   248  	}
   249  	return &envExpr{kind: envSum, operands: terms}
   250  }
   251  
   252  func crossEnvs(env1, env2 envSet) envSet {
   253  	// Confirm that envs have disjoint idents.
   254  	var ids1 smallSet[*ident]
   255  	for e := range env1.root.bindings(nil) {
   256  		ids1.Add(e.id)
   257  	}
   258  	for e := range env2.root.bindings(nil) {
   259  		if ids1.Has(e.id) {
   260  			panic(fmt.Sprintf("%s bound on both sides of cross-product", e.id.name))
   261  		}
   262  	}
   263  
   264  	return envSet{newEnvExprProduct(env1.root, env2.root)}
   265  }
   266  
   267  func unionEnvs(envs ...envSet) envSet {
   268  	exprs := make([]*envExpr, len(envs))
   269  	for i := range envs {
   270  		exprs[i] = envs[i].root
   271  	}
   272  	return envSet{newEnvExprSum(exprs...)}
   273  }
   274  
   275  // envPartition is a subset of an env where id is bound to value in all
   276  // deterministic environments.
   277  type envPartition struct {
   278  	id    *ident
   279  	value *Value
   280  	env   envSet
   281  }
   282  
   283  // partitionBy splits e by distinct bindings of id and removes id from each
   284  // partition.
   285  //
   286  // If there are environments in e where id is not bound, they will not be
   287  // reflected in any partition.
   288  //
   289  // It panics if e is bottom, since attempting to partition an empty environment
   290  // set almost certainly indicates a bug.
   291  func (e envSet) partitionBy(id *ident) []envPartition {
   292  	if e.isEmpty() {
   293  		// We could return zero partitions, but getting here at all almost
   294  		// certainly indicates a bug.
   295  		panic("cannot partition empty environment set")
   296  	}
   297  
   298  	// Emit a partition for each value of id.
   299  	var seen smallSet[*Value]
   300  	var parts []envPartition
   301  	for n := range e.root.bindings(id) {
   302  		if !seen.Add(n.val) {
   303  			// Already emitted a partition for this value.
   304  			continue
   305  		}
   306  
   307  		parts = append(parts, envPartition{
   308  			id:    id,
   309  			value: n.val,
   310  			env:   envSet{e.root.substitute(id, n.val)},
   311  		})
   312  	}
   313  
   314  	return parts
   315  }
   316  
   317  // substitute replaces bindings of id to val with 1 and bindings of id to any
   318  // other value with 0 and simplifies the result.
   319  func (e *envExpr) substitute(id *ident, val *Value) *envExpr {
   320  	switch e.kind {
   321  	default:
   322  		panic("bad kind")
   323  
   324  	case envZero, envUnit:
   325  		return e
   326  
   327  	case envBinding:
   328  		if e.id != id {
   329  			return e
   330  		} else if e.val != val {
   331  			return envExprZero
   332  		} else {
   333  			return envExprUnit
   334  		}
   335  
   336  	case envProduct, envSum:
   337  		// Substitute each operand. Sometimes, this won't change anything, so we
   338  		// build the new operands list lazily.
   339  		var nOperands []*envExpr
   340  		for i, op := range e.operands {
   341  			nOp := op.substitute(id, val)
   342  			if nOperands == nil && op != nOp {
   343  				// Operand diverged; initialize nOperands.
   344  				nOperands = make([]*envExpr, 0, len(e.operands))
   345  				nOperands = append(nOperands, e.operands[:i]...)
   346  			}
   347  			if nOperands != nil {
   348  				nOperands = append(nOperands, nOp)
   349  			}
   350  		}
   351  		if nOperands == nil {
   352  			// Nothing changed.
   353  			return e
   354  		}
   355  		if e.kind == envProduct {
   356  			return newEnvExprProduct(nOperands...)
   357  		} else {
   358  			return newEnvExprSum(nOperands...)
   359  		}
   360  	}
   361  }
   362  
   363  // A smallSet is a set optimized for stack allocation when small.
   364  type smallSet[T comparable] struct {
   365  	array [32]T
   366  	n     int
   367  
   368  	m map[T]struct{}
   369  }
   370  
   371  // Has returns whether val is in set.
   372  func (s *smallSet[T]) Has(val T) bool {
   373  	arr := s.array[:s.n]
   374  	for i := range arr {
   375  		if arr[i] == val {
   376  			return true
   377  		}
   378  	}
   379  	_, ok := s.m[val]
   380  	return ok
   381  }
   382  
   383  // Add adds val to the set and returns true if it was added (not already
   384  // present).
   385  func (s *smallSet[T]) Add(val T) bool {
   386  	// Test for presence.
   387  	if s.Has(val) {
   388  		return false
   389  	}
   390  
   391  	// Add it
   392  	if s.n < len(s.array) {
   393  		s.array[s.n] = val
   394  		s.n++
   395  	} else {
   396  		if s.m == nil {
   397  			s.m = make(map[T]struct{})
   398  		}
   399  		s.m[val] = struct{}{}
   400  	}
   401  	return true
   402  }
   403  
   404  type ident struct {
   405  	_    [0]func() // Not comparable (only compare *ident)
   406  	name string
   407  }
   408  
   409  type Var struct {
   410  	id *ident
   411  }
   412  
   413  func (d Var) Exact() bool {
   414  	// These can't appear in concrete Values.
   415  	panic("Exact called on non-concrete Value")
   416  }
   417  
   418  func (d Var) WhyNotExact() string {
   419  	// These can't appear in concrete Values.
   420  	return "WhyNotExact called on non-concrete Value"
   421  }
   422  
   423  func (d Var) decode(rv reflect.Value) error {
   424  	return &inexactError{"var", rv.Type().String()}
   425  }
   426  
   427  func (d Var) unify(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
   428  	// TODO: Vars from !sums in the input can have a huge number of values.
   429  	// Unifying these could be way more efficient with some indexes over any
   430  	// exact values we can pull out, like Def fields that are exact Strings.
   431  	// Maybe we try to produce an array of yes/no/maybe matches and then we only
   432  	// have to do deeper evaluation of the maybes. We could probably cache this
   433  	// on an envTerm. It may also help to special-case Var/Var unification to
   434  	// pick which one to index versus enumerate.
   435  
   436  	if vd, ok := w.Domain.(Var); ok && d.id == vd.id {
   437  		// Unifying $x with $x results in $x. If we descend into this we'll have
   438  		// problems because we strip $x out of the environment to keep ourselves
   439  		// honest and then can't find it on the other side.
   440  		//
   441  		// TODO: I'm not positive this is the right fix.
   442  		return vd, e, nil
   443  	}
   444  
   445  	// We need to unify w with the value of d in each possible environment. We
   446  	// can save some work by grouping environments by the value of d, since
   447  	// there will be a lot of redundancy here.
   448  	var nEnvs []envSet
   449  	envParts := e.partitionBy(d.id)
   450  	for i, envPart := range envParts {
   451  		exit := uf.enterVar(d.id, i)
   452  		// Each branch logically gets its own copy of the initial environment
   453  		// (narrowed down to just this binding of the variable), and each branch
   454  		// may result in different changes to that starting environment.
   455  		res, e2, err := w.unify(envPart.value, envPart.env, swap, uf)
   456  		exit.exit()
   457  		if err != nil {
   458  			return nil, envSet{}, err
   459  		}
   460  		if res.Domain == nil {
   461  			// This branch entirely failed to unify, so it's gone.
   462  			continue
   463  		}
   464  		nEnv := e2.bind(d.id, res)
   465  		nEnvs = append(nEnvs, nEnv)
   466  	}
   467  
   468  	if len(nEnvs) == 0 {
   469  		// All branches failed
   470  		return nil, bottomEnv, nil
   471  	}
   472  
   473  	// The effect of this is entirely captured in the environment. We can return
   474  	// back the same Bind node.
   475  	return d, unionEnvs(nEnvs...), nil
   476  }
   477  
   478  // An identPrinter maps [ident]s to unique string names.
   479  type identPrinter struct {
   480  	ids   map[*ident]string
   481  	idGen map[string]int
   482  }
   483  
   484  func (p *identPrinter) unique(id *ident) string {
   485  	if p.ids == nil {
   486  		p.ids = make(map[*ident]string)
   487  		p.idGen = make(map[string]int)
   488  	}
   489  
   490  	name, ok := p.ids[id]
   491  	if !ok {
   492  		gen := p.idGen[id.name]
   493  		p.idGen[id.name]++
   494  		if gen == 0 {
   495  			name = id.name
   496  		} else {
   497  			name = fmt.Sprintf("%s#%d", id.name, gen)
   498  		}
   499  		p.ids[id] = name
   500  	}
   501  
   502  	return name
   503  }
   504  
   505  func (p *identPrinter) slice(ids []*ident) string {
   506  	var strs []string
   507  	for _, id := range ids {
   508  		strs = append(strs, p.unique(id))
   509  	}
   510  	return fmt.Sprintf("[%s]", strings.Join(strs, ", "))
   511  }
   512  

View as plain text