Source file src/simd/_gen/unify/unify.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package unify implements unification of structured values.
     6  //
     7  // A [Value] represents a possibly infinite set of concrete values, where a
     8  // value is either a string ([String]), a tuple of values ([Tuple]), or a
     9  // string-keyed map of values called a "def" ([Def]). These sets can be further
    10  // constrained by variables ([Var]). A [Value] combined with bindings of
    11  // variables is a [Closure].
    12  //
    13  // [Unify] finds a [Closure] that satisfies two or more other [Closure]s. This
    14  // can be thought of as intersecting the sets represented by these Closures'
    15  // values, or as the greatest lower bound/infimum of these Closures. If no such
    16  // Closure exists, the result of unification is "bottom", or the empty set.
    17  //
    18  // # Examples
    19  //
    20  // The regular expression "a*" is the infinite set of strings of zero or more
    21  // "a"s. "a*" can be unified with "a" or "aa" or "aaa", and the result is just
    22  // "a", "aa", or "aaa", respectively. However, unifying "a*" with "b" fails
    23  // because there are no values that satisfy both.
    24  //
    25  // Sums express sets directly. For example, !sum [a, b] is the set consisting of
    26  // "a" and "b". Unifying this with !sum [b, c] results in just "b". This also
    27  // makes it easy to demonstrate that unification isn't necessarily a single
    28  // concrete value. For example, unifying !sum [a, b, c] with !sum [b, c, d]
    29  // results in two concrete values: "b" and "c".
    30  //
    31  // The special value _ or "top" represents all possible values. Unifying _ with
    32  // any value x results in x.
    33  //
    34  // Unifying composite values—tuples and defs—unifies their elements.
    35  //
    36  // The value [a*, aa] is an infinite set of tuples. If we unify that with the
    37  // value [aaa, a*], the only possible value that satisfies both is [aaa, aa].
    38  // Likewise, this is the intersection of the sets described by these two values.
    39  //
    40  // Defs are similar to tuples, but they are indexed by strings and don't have a
    41  // fixed length. For example, {x: a, y: b} is a def with two fields. Any field
    42  // not mentioned in a def is implicitly top. Thus, unifying this with {y: b, z:
    43  // c} results in {x: a, y: b, z: c}.
    44  //
    45  // Variables constrain values. For example, the value [$x, $x] represents all
    46  // tuples whose first and second values are the same, but doesn't otherwise
    47  // constrain that value. Thus, this set includes [a, a] as well as [[b, c, d],
    48  // [b, c, d]], but it doesn't include [a, b].
    49  //
    50  // Sums are internally implemented as fresh variables that are simultaneously
    51  // bound to all values of the sum. That is !sum [a, b] is actually $var (where
    52  // var is some fresh name), closed under the environment $var=a | $var=b.
    53  package unify
    54  
    55  import (
    56  	"errors"
    57  	"fmt"
    58  	"slices"
    59  )
    60  
    61  // Unify computes a Closure that satisfies each input Closure. If no such
    62  // Closure exists, it returns bottom.
    63  func Unify(closures ...Closure) (Closure, error) {
    64  	if len(closures) == 0 {
    65  		return Closure{topValue, topEnv}, nil
    66  	}
    67  
    68  	var trace *tracer
    69  	if Debug.UnifyLog != nil || Debug.HTML != nil {
    70  		trace = &tracer{
    71  			logw:     Debug.UnifyLog,
    72  			saveTree: Debug.HTML != nil,
    73  		}
    74  	}
    75  
    76  	unified := closures[0]
    77  	for _, c := range closures[1:] {
    78  		var err error
    79  		uf := newUnifier()
    80  		uf.tracer = trace
    81  		e := crossEnvs(unified.env, c.env)
    82  		unified.val, unified.env, err = unified.val.unify(c.val, e, false, uf)
    83  		if Debug.HTML != nil {
    84  			uf.writeHTML(Debug.HTML)
    85  		}
    86  		if err != nil {
    87  			return Closure{}, err
    88  		}
    89  	}
    90  
    91  	return unified, nil
    92  }
    93  
    94  type unifier struct {
    95  	*tracer
    96  }
    97  
    98  func newUnifier() *unifier {
    99  	return &unifier{}
   100  }
   101  
   102  // errDomains is a sentinel error used between unify and unify1 to indicate that
   103  // unify1 could not unify the domains of the two values.
   104  var errDomains = errors.New("cannot unify domains")
   105  
   106  func (v *Value) unify(w *Value, e envSet, swap bool, uf *unifier) (*Value, envSet, error) {
   107  	if swap {
   108  		// Put the values in order. This just happens to be a handy choke-point
   109  		// to do this at.
   110  		v, w = w, v
   111  	}
   112  
   113  	uf.traceUnify(v, w, e)
   114  
   115  	d, e2, err := v.unify1(w, e, false, uf)
   116  	if err == errDomains {
   117  		// Try the other order.
   118  		d, e2, err = w.unify1(v, e, true, uf)
   119  		if err == errDomains {
   120  			// Okay, we really can't unify these.
   121  			err = fmt.Errorf("cannot unify %T (%s) and %T (%s): kind mismatch", v.Domain, v.PosString(), w.Domain, w.PosString())
   122  		}
   123  	}
   124  	if err != nil {
   125  		uf.traceDone(nil, envSet{}, err)
   126  		return nil, envSet{}, err
   127  	}
   128  	res := unified(d, v, w)
   129  	uf.traceDone(res, e2, nil)
   130  	if d == nil {
   131  		// Double check that a bottom Value also has a bottom env.
   132  		if !e2.isEmpty() {
   133  			panic("bottom Value has non-bottom environment")
   134  		}
   135  	}
   136  
   137  	return res, e2, nil
   138  }
   139  
   140  func (v *Value) unify1(w *Value, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
   141  	// TODO: If there's an error, attach position information to it.
   142  
   143  	vd, wd := v.Domain, w.Domain
   144  
   145  	// Bottom returns bottom, and eliminates all possible environments.
   146  	if vd == nil || wd == nil {
   147  		return nil, bottomEnv, nil
   148  	}
   149  
   150  	// Top always returns the other.
   151  	if _, ok := vd.(Top); ok {
   152  		return wd, e, nil
   153  	}
   154  
   155  	// Variables
   156  	if vd, ok := vd.(Var); ok {
   157  		return vd.unify(w, e, swap, uf)
   158  	}
   159  
   160  	// Composite values
   161  	if vd, ok := vd.(Def); ok {
   162  		if wd, ok := wd.(Def); ok {
   163  			return vd.unify(wd, e, swap, uf)
   164  		}
   165  	}
   166  	if vd, ok := vd.(Tuple); ok {
   167  		if wd, ok := wd.(Tuple); ok {
   168  			return vd.unify(wd, e, swap, uf)
   169  		}
   170  	}
   171  
   172  	// Scalar values
   173  	if vd, ok := vd.(String); ok {
   174  		if wd, ok := wd.(String); ok {
   175  			res := vd.unify(wd)
   176  			if res == nil {
   177  				e = bottomEnv
   178  			}
   179  			return res, e, nil
   180  		}
   181  	}
   182  
   183  	return nil, envSet{}, errDomains
   184  }
   185  
   186  func (d Def) unify(o Def, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
   187  	out := Def{fields: make(map[string]*Value)}
   188  
   189  	// Check keys of d against o.
   190  	for key, dv := range d.All() {
   191  		ov, ok := o.fields[key]
   192  		if !ok {
   193  			// ov is implicitly Top. Bypass unification.
   194  			out.fields[key] = dv
   195  			continue
   196  		}
   197  		exit := uf.enter("%s", key)
   198  		res, e2, err := dv.unify(ov, e, swap, uf)
   199  		exit.exit()
   200  		if err != nil {
   201  			return nil, envSet{}, err
   202  		} else if res.Domain == nil {
   203  			// No match.
   204  			return nil, bottomEnv, nil
   205  		}
   206  		out.fields[key] = res
   207  		e = e2
   208  	}
   209  	// Check keys of o that we didn't already check. These all implicitly match
   210  	// because we know the corresponding fields in d are all Top.
   211  	for key, dv := range o.All() {
   212  		if _, ok := d.fields[key]; !ok {
   213  			out.fields[key] = dv
   214  		}
   215  	}
   216  	return out, e, nil
   217  }
   218  
   219  func (v Tuple) unify(w Tuple, e envSet, swap bool, uf *unifier) (Domain, envSet, error) {
   220  	if v.repeat != nil && w.repeat != nil {
   221  		// Since we generate the content of these lazily, there's not much we
   222  		// can do but just stick them on a list to unify later.
   223  		return Tuple{repeat: concat(v.repeat, w.repeat)}, e, nil
   224  	}
   225  
   226  	// Expand any repeated tuples.
   227  	tuples := make([]Tuple, 0, 2)
   228  	if v.repeat == nil {
   229  		tuples = append(tuples, v)
   230  	} else {
   231  		v2, e2 := v.doRepeat(e, len(w.vs))
   232  		tuples = append(tuples, v2...)
   233  		e = e2
   234  	}
   235  	if w.repeat == nil {
   236  		tuples = append(tuples, w)
   237  	} else {
   238  		w2, e2 := w.doRepeat(e, len(v.vs))
   239  		tuples = append(tuples, w2...)
   240  		e = e2
   241  	}
   242  
   243  	// Now unify all of the tuples (usually this will be just 2 tuples)
   244  	out := tuples[0]
   245  	for _, t := range tuples[1:] {
   246  		if len(out.vs) != len(t.vs) {
   247  			uf.logf("tuple length mismatch")
   248  			return nil, bottomEnv, nil
   249  		}
   250  		zs := make([]*Value, len(out.vs))
   251  		for i, v1 := range out.vs {
   252  			exit := uf.enter("%d", i)
   253  			z, e2, err := v1.unify(t.vs[i], e, swap, uf)
   254  			exit.exit()
   255  			if err != nil {
   256  				return nil, envSet{}, err
   257  			} else if z.Domain == nil {
   258  				return nil, bottomEnv, nil
   259  			}
   260  			zs[i] = z
   261  			e = e2
   262  		}
   263  		out = Tuple{vs: zs}
   264  	}
   265  
   266  	return out, e, nil
   267  }
   268  
   269  // doRepeat creates a fixed-length tuple from a repeated tuple. The caller is
   270  // expected to unify the returned tuples.
   271  func (v Tuple) doRepeat(e envSet, n int) ([]Tuple, envSet) {
   272  	res := make([]Tuple, len(v.repeat))
   273  	for i, gen := range v.repeat {
   274  		res[i].vs = make([]*Value, n)
   275  		for j := range n {
   276  			res[i].vs[j], e = gen(e)
   277  		}
   278  	}
   279  	return res, e
   280  }
   281  
   282  // unify intersects the domains of two [String]s. If it can prove that this
   283  // domain is empty, it returns nil (bottom).
   284  //
   285  // TODO: Consider splitting literals and regexps into two domains.
   286  func (v String) unify(w String) Domain {
   287  	// Unification is symmetric, so put them in order of string kind so we only
   288  	// have to deal with half the cases.
   289  	if v.kind > w.kind {
   290  		v, w = w, v
   291  	}
   292  
   293  	switch v.kind {
   294  	case stringRegex:
   295  		switch w.kind {
   296  		case stringRegex:
   297  			// Construct a match against all of the regexps
   298  			return String{kind: stringRegex, re: slices.Concat(v.re, w.re)}
   299  		case stringExact:
   300  			for _, re := range v.re {
   301  				if !re.MatchString(w.exact) {
   302  					return nil
   303  				}
   304  			}
   305  			return w
   306  		}
   307  	case stringExact:
   308  		if v.exact != w.exact {
   309  			return nil
   310  		}
   311  		return v
   312  	}
   313  	panic("bad string kind")
   314  }
   315  
   316  func concat[T any](s1, s2 []T) []T {
   317  	// Reuse s1 or s2 if possible.
   318  	if len(s1) == 0 {
   319  		return s2
   320  	}
   321  	return append(s1[:len(s1):len(s1)], s2...)
   322  }
   323  

View as plain text