Source file src/simd/archsimd/slicepart_128.go

     1  // Copyright 2026 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.simd && (amd64 || arm64 || wasm)
     6  
     7  package archsimd
     8  
     9  import "unsafe"
    10  
    11  // Implementation of all the {Int,Uint}{8,16} load and store part
    12  // functions and methods for 128-bit for architectures that must do that by pieces.
    13  
    14  /* pointer-punning functions for chunked part-of-slice loads. */
    15  
    16  func int16atP8(p *int8) *int16 {
    17  	return (*int16)(unsafe.Pointer(p))
    18  }
    19  
    20  func int32atP8(p *int8) *int32 {
    21  	return (*int32)(unsafe.Pointer(p))
    22  }
    23  
    24  func int64atP8(p *int8) *int64 {
    25  	return (*int64)(unsafe.Pointer(p))
    26  }
    27  
    28  func int32atP16(p *int16) *int32 {
    29  	return (*int32)(unsafe.Pointer(p))
    30  }
    31  
    32  func int64atP16(p *int16) *int64 {
    33  	return (*int64)(unsafe.Pointer(p))
    34  }
    35  
    36  func int64atP32(p *int32) *int64 {
    37  	return (*int64)(unsafe.Pointer(p))
    38  }
    39  
    40  func int32atP64(p *int64) *int32 {
    41  	return (*int32)(unsafe.Pointer(p))
    42  }
    43  
    44  func uint16atP8(p *uint8) *uint16 {
    45  	return (*uint16)(unsafe.Pointer(p))
    46  }
    47  
    48  func uint32atP8(p *uint8) *uint32 {
    49  	return (*uint32)(unsafe.Pointer(p))
    50  }
    51  
    52  func uint64atP8(p *uint8) *uint64 {
    53  	return (*uint64)(unsafe.Pointer(p))
    54  }
    55  
    56  func uint32atP16(p *uint16) *uint32 {
    57  	return (*uint32)(unsafe.Pointer(p))
    58  }
    59  
    60  func uint64atP16(p *uint16) *uint64 {
    61  	return (*uint64)(unsafe.Pointer(p))
    62  }
    63  
    64  func uint64atP32(p *uint32) *uint64 {
    65  	return (*uint64)(unsafe.Pointer(p))
    66  }
    67  
    68  func uint32atP64(p *uint64) *uint32 {
    69  	return (*uint32)(unsafe.Pointer(p))
    70  }
    71  
    72  func float64atP32(p *float32) *float64 {
    73  	return (*float64)(unsafe.Pointer(p))
    74  }
    75  
    76  func float32atP64(p *float64) *float32 {
    77  	return (*float32)(unsafe.Pointer(p))
    78  }
    79  
    80  /* 128-bit vector load and store slice parts for 8 and 16-bit int elements */
    81  
    82  // LoadUint8x16Part loads a Uint8x16 from the slice s.
    83  // If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
    84  // If s has 16 or more elements, the function is equivalent to LoadInt8x16.
    85  func LoadUint8x16Part(s []uint8) (Uint8x16, int) {
    86  	l := len(s)
    87  	if l >= 16 {
    88  		return LoadUint8x16(s), 16
    89  	}
    90  	var x Uint8x16
    91  	if l == 0 {
    92  		return x, 0
    93  	}
    94  	if l >= 8 { // 8-15
    95  		x = x.ReshapeToUint64s().SetElem(0, *uint64atP8(&s[0])).ReshapeToUint8s()
    96  		if l >= 12 { // 12, 13, 14, 15
    97  			x = x.ReshapeToUint32s().SetElem(8/4, *uint32atP8(&s[8])).ReshapeToUint8s()
    98  			if l >= 14 {
    99  				x = x.ReshapeToUint16s().SetElem(12/2, *uint16atP8(&s[12])).ReshapeToUint8s()
   100  				if l == 15 {
   101  					x = x.SetElem(14, s[14])
   102  				}
   103  			} else if l == 13 {
   104  				x = x.SetElem(12, s[12])
   105  			}
   106  		} else if l >= 10 { // 10, 11
   107  			x = x.ReshapeToUint16s().SetElem(8/2, *uint16atP8(&s[8])).ReshapeToUint8s()
   108  			if l == 11 {
   109  				x = x.SetElem(10, s[10])
   110  			}
   111  		} else if l == 9 {
   112  			x = x.SetElem(8, s[8])
   113  		}
   114  	} else if l >= 4 { // 4-7
   115  		x = x.ReshapeToUint32s().SetElem(0, *uint32atP8(&s[0])).ReshapeToUint8s()
   116  		if l >= 6 {
   117  			x = x.ReshapeToUint16s().SetElem(4/2, *uint16atP8(&s[4])).ReshapeToUint8s()
   118  			if l == 7 {
   119  				x = x.SetElem(6, s[6])
   120  			}
   121  		} else if l == 5 {
   122  			x = x.SetElem(4, s[4])
   123  		}
   124  	} else if l >= 2 { // 2,3
   125  		x = x.ReshapeToUint16s().SetElem(0, *uint16atP8(&s[0])).ReshapeToUint8s()
   126  		if l == 3 {
   127  			x = x.SetElem(2, s[2])
   128  		}
   129  	} else { // l == 1
   130  		x = x.SetElem(0, s[0])
   131  	}
   132  	return x, l
   133  }
   134  
   135  // StorePart stores the elements of x into the slice s.
   136  // It stores as many elements as will fit in s.
   137  // If s has 16 or more elements, the method is equivalent to x.Store.
   138  func (x Uint8x16) StorePart(s []uint8) int {
   139  	l := len(s)
   140  	if l >= 16 {
   141  		x.Store(s)
   142  		return 16
   143  	}
   144  	if l == 0 {
   145  		return 0
   146  	}
   147  	if l >= 8 { // 8-15
   148  		*uint64atP8(&s[0]) = x.ReshapeToUint64s().GetElem(0)
   149  		if l >= 12 { // 12, 13, 14, 15
   150  			*uint32atP8(&s[8]) = x.ReshapeToUint32s().GetElem(8 / 4)
   151  			if l >= 14 {
   152  				*uint16atP8(&s[12]) = x.ReshapeToUint16s().GetElem(12 / 2)
   153  				if l == 15 {
   154  					s[14] = x.GetElem(14)
   155  				}
   156  			} else if l == 13 {
   157  				s[12] = x.GetElem(12)
   158  			}
   159  		} else if l >= 10 { // 10, 11
   160  			*uint16atP8(&s[8]) = x.ReshapeToUint16s().GetElem(8 / 2)
   161  			if l == 11 {
   162  				s[10] = x.GetElem(10)
   163  			}
   164  		} else if l == 9 {
   165  			s[8] = x.GetElem(8)
   166  		}
   167  	} else if l >= 4 { // 4-7
   168  		*uint32atP8(&s[0]) = x.ReshapeToUint32s().GetElem(0)
   169  		if l >= 6 {
   170  			*uint16atP8(&s[4]) = x.ReshapeToUint16s().GetElem(4 / 2)
   171  			if l == 7 {
   172  				s[6] = x.GetElem(6)
   173  			}
   174  		} else if l == 5 {
   175  			s[4] = x.GetElem(4)
   176  		}
   177  	} else if l >= 2 { // 2,3
   178  		*uint16atP8(&s[0]) = x.ReshapeToUint16s().GetElem(0)
   179  		if l == 3 {
   180  			s[2] = x.GetElem(2)
   181  		}
   182  	} else { // l == 1
   183  		s[0] = x.GetElem(0)
   184  	}
   185  	return l
   186  }
   187  
   188  // LoadUint16x8Part loads a Uint16x8 from the slice s.
   189  // If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
   190  // If s has 8 or more elements, the function is equivalent to LoadInt16x8.
   191  func LoadUint16x8Part(s []uint16) (Uint16x8, int) {
   192  	l := len(s)
   193  	if l >= 8 {
   194  		return LoadUint16x8(s), 8
   195  	}
   196  	var x Uint16x8
   197  	if l == 0 {
   198  		return x, 0
   199  	}
   200  	if l >= 4 { // 4-7
   201  		x = x.ReshapeToUint64s().SetElem(0, *uint64atP16(&s[0])).ReshapeToUint16s()
   202  		if l >= 6 {
   203  			x = x.ReshapeToUint32s().SetElem(4/2, *uint32atP16(&s[4])).ReshapeToUint16s()
   204  			if l == 7 {
   205  				x = x.SetElem(6, s[6])
   206  			}
   207  		} else if l == 5 {
   208  			x = x.SetElem(4, s[4])
   209  		}
   210  	} else if l >= 2 { // 2,3
   211  		x = x.ReshapeToUint32s().SetElem(0, *uint32atP16(&s[0])).ReshapeToUint16s()
   212  		if l == 3 {
   213  			x = x.SetElem(2, s[2])
   214  		}
   215  	} else { // l == 1
   216  		x = x.SetElem(0, s[0])
   217  	}
   218  	return x, l
   219  }
   220  
   221  // StorePart stores the elements of x into the slice s.
   222  // It stores as many elements as will fit in s.
   223  // If s has 8 or more elements, the method is equivalent to x.Store.
   224  func (x Uint16x8) StorePart(s []uint16) int {
   225  	l := len(s)
   226  	if l >= 8 {
   227  		x.Store(s)
   228  		return 8
   229  	}
   230  	if l == 0 {
   231  		return 0
   232  	}
   233  	if l >= 4 { // 4-7
   234  		*uint64atP16(&s[0]) = x.ReshapeToUint64s().GetElem(0)
   235  		if l >= 6 {
   236  			*uint32atP16(&s[4]) = x.ReshapeToUint32s().GetElem(4 / 2)
   237  			if l == 7 {
   238  				s[6] = x.GetElem(6)
   239  			}
   240  		} else if l == 5 {
   241  			s[4] = x.GetElem(4)
   242  		}
   243  	} else if l >= 2 { // 2,3
   244  		*uint32atP16(&s[0]) = x.ReshapeToUint32s().GetElem(0)
   245  		if l == 3 {
   246  			s[2] = x.GetElem(2)
   247  		}
   248  	} else { // l == 1
   249  		s[0] = x.GetElem(0)
   250  	}
   251  	return l
   252  }
   253  
   254  // LoadInt8x16Part loads a Int8x16 from the slice s, it returns the loaded vector and the
   255  // number of elements loaded.
   256  // If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
   257  // If s has 16 or more elements, the function is equivalent to LoadInt8x16.
   258  func LoadInt8x16Part(s []int8) (Int8x16, int) {
   259  	if len(s) == 0 {
   260  		var zero Int8x16
   261  		return zero, 0
   262  	}
   263  	t := unsafe.Slice((*uint8)(unsafe.Pointer(&s[0])), len(s))
   264  	v, l := LoadUint8x16Part(t)
   265  	return v.BitsToInt8(), l
   266  }
   267  
   268  // StorePart stores the 16 elements of x into the slice s.
   269  // It stores as many elements as will fit in s.
   270  // If s has 16 or more elements, the method is equivalent to x.Store.
   271  func (x Int8x16) StorePart(s []int8) int {
   272  	if len(s) == 0 {
   273  		return 0
   274  	}
   275  	t := unsafe.Slice((*uint8)(unsafe.Pointer(&s[0])), len(s))
   276  	return x.ToBits().StorePart(t)
   277  }
   278  
   279  // LoadInt16x8Part loads a Int16x8 from the slice s, it returns the loaded vector and the
   280  // number of elements loaded.
   281  // If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
   282  // If s has 8 or more elements, the function is equivalent to LoadInt16x8.
   283  func LoadInt16x8Part(s []int16) (Int16x8, int) {
   284  	if len(s) == 0 {
   285  		var zero Int16x8
   286  		return zero, 0
   287  	}
   288  	t := unsafe.Slice((*uint16)(unsafe.Pointer(&s[0])), len(s))
   289  	v, l := LoadUint16x8Part(t)
   290  	return v.BitsToInt16(), l
   291  }
   292  
   293  // StorePart stores the 8 elements of x into the slice s.
   294  // It stores as many elements as will fit in s.
   295  // If s has 8 or more elements, the method is equivalent to x.Store.
   296  func (x Int16x8) StorePart(s []int16) int {
   297  	if len(s) == 0 {
   298  		return 0
   299  	}
   300  	t := unsafe.Slice((*uint16)(unsafe.Pointer(&s[0])), len(s))
   301  	return x.ToBits().StorePart(t)
   302  }
   303  

View as plain text