Source file src/simd/slicepart_amd64.go

     1  // Copyright 2025 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build goexperiment.simd
     6  
     7  package simd
     8  
     9  import "unsafe"
    10  
    11  // Implementation of all the {Int,Uint}{8,16} load and store slice part
    12  // functions and methods for 128-bit and 256-bit vectors.
    13  
    14  /* pointer-punning functions for chunked slice part loads. */
    15  
    16  func int16atP8(p *int8) *int16 {
    17  	return (*int16)(unsafe.Pointer(p))
    18  }
    19  
    20  func int32atP8(p *int8) *int32 {
    21  	return (*int32)(unsafe.Pointer(p))
    22  }
    23  
    24  func int64atP8(p *int8) *int64 {
    25  	return (*int64)(unsafe.Pointer(p))
    26  }
    27  
    28  func int32atP16(p *int16) *int32 {
    29  	return (*int32)(unsafe.Pointer(p))
    30  }
    31  
    32  func int64atP16(p *int16) *int64 {
    33  	return (*int64)(unsafe.Pointer(p))
    34  }
    35  
    36  func int64atP32(p *int32) *int64 {
    37  	return (*int64)(unsafe.Pointer(p))
    38  }
    39  
    40  func int32atP64(p *int64) *int32 {
    41  	return (*int32)(unsafe.Pointer(p))
    42  }
    43  
    44  /* These two masks are used by generated code */
    45  
    46  var vecMask64 = [16]int64{
    47  	-1, -1, -1, -1,
    48  	-1, -1, -1, -1,
    49  	0, 0, 0, 0,
    50  	0, 0, 0, 0,
    51  }
    52  
    53  var vecMask32 = [32]int32{
    54  	-1, -1, -1, -1,
    55  	-1, -1, -1, -1,
    56  	-1, -1, -1, -1,
    57  	-1, -1, -1, -1,
    58  	0, 0, 0, 0,
    59  	0, 0, 0, 0,
    60  	0, 0, 0, 0,
    61  	0, 0, 0, 0,
    62  }
    63  
    64  /* 256-bit int vector loads and stores made from 128-bit parts */
    65  
    66  // LoadInt8x32SlicePart loads a Int8x32 from the slice s.
    67  // If s has fewer than 32 elements, the remaining elements of the vector are filled with zeroes.
    68  // If s has 32 or more elements, the function is equivalent to LoadInt8x32Slice.
    69  func LoadInt8x32SlicePart(s []int8) Int8x32 {
    70  	l := len(s)
    71  	if l >= 32 {
    72  		return LoadInt8x32Slice(s)
    73  	}
    74  	var x Int8x32
    75  	if l == 0 {
    76  		return x
    77  	}
    78  	if l > 16 {
    79  		return x.SetLo(LoadInt8x16Slice(s)).SetHi(LoadInt8x16SlicePart(s[16:]))
    80  	} else {
    81  		return x.SetLo(LoadInt8x16SlicePart(s))
    82  	}
    83  }
    84  
    85  // LoadInt16x16SlicePart loads a Int16x16 from the slice s.
    86  // If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
    87  // If s has 16 or more elements, the function is equivalent to LoadInt16x16Slice.
    88  func LoadInt16x16SlicePart(s []int16) Int16x16 {
    89  	l := len(s)
    90  	if l >= 16 {
    91  		return LoadInt16x16Slice(s)
    92  	}
    93  	var x Int16x16
    94  	if l == 0 {
    95  		return x
    96  	}
    97  	if l > 8 {
    98  		return x.SetLo(LoadInt16x8Slice(s)).SetHi(LoadInt16x8SlicePart(s[8:]))
    99  	} else {
   100  		return x.SetLo(LoadInt16x8SlicePart(s))
   101  	}
   102  }
   103  
   104  // StoreSlicePart stores the elements of x into the slice s.
   105  // It stores as many elements as will fit in s.
   106  // If s has 32 or more elements, the method is equivalent to x.StoreSlice.
   107  func (x Int8x32) StoreSlicePart(s []int8) {
   108  	l := len(s)
   109  	if l >= 32 {
   110  		x.StoreSlice(s)
   111  		return
   112  	}
   113  	if l == 0 {
   114  		return
   115  	}
   116  	if l > 16 {
   117  		x.GetLo().StoreSlice(s)
   118  		x.GetHi().StoreSlicePart(s[16:])
   119  	} else { // fits in one
   120  		x.GetLo().StoreSlicePart(s)
   121  	}
   122  }
   123  
   124  // StoreSlicePart stores the elements of x into the slice s.
   125  // It stores as many elements as will fit in s.
   126  // If s has 16 or more elements, the method is equivalent to x.StoreSlice.
   127  func (x Int16x16) StoreSlicePart(s []int16) {
   128  	l := len(s)
   129  	if l >= 16 {
   130  		x.StoreSlice(s)
   131  		return
   132  	}
   133  	if l == 0 {
   134  		return
   135  	}
   136  	if l > 8 {
   137  		x.GetLo().StoreSlice(s)
   138  		x.GetHi().StoreSlicePart(s[8:])
   139  	} else { // fits in one
   140  		x.GetLo().StoreSlicePart(s)
   141  	}
   142  }
   143  
   144  /* 128-bit vector load and store slice parts for 8 and 16-bit int elements */
   145  
   146  // LoadInt8x16SlicePart loads a Int8x16 from the slice s.
   147  // If s has fewer than 16 elements, the remaining elements of the vector are filled with zeroes.
   148  // If s has 16 or more elements, the function is equivalent to LoadInt8x16Slice.
   149  func LoadInt8x16SlicePart(s []int8) Int8x16 {
   150  	l := len(s)
   151  	if l >= 16 {
   152  		return LoadInt8x16Slice(s)
   153  	}
   154  	var x Int8x16
   155  	if l == 0 {
   156  		return x
   157  	}
   158  	if l >= 8 { // 8-15
   159  		x = x.AsInt64x2().SetElem(0, *int64atP8(&s[0])).AsInt8x16()
   160  		if l >= 12 { // 12, 13, 14, 15
   161  			x = x.AsInt32x4().SetElem(8/4, *int32atP8(&s[8])).AsInt8x16()
   162  			if l >= 14 {
   163  				x = x.AsInt16x8().SetElem(12/2, *int16atP8(&s[12])).AsInt8x16()
   164  				if l == 15 {
   165  					x = x.SetElem(14, s[14])
   166  				}
   167  			} else if l == 13 {
   168  				x = x.SetElem(12, s[12])
   169  			}
   170  		} else if l >= 10 { // 10, 11
   171  			x = x.AsInt16x8().SetElem(8/2, *int16atP8(&s[8])).AsInt8x16()
   172  			if l == 11 {
   173  				x = x.SetElem(10, s[10])
   174  			}
   175  		} else if l == 9 {
   176  			x = x.SetElem(8, s[8])
   177  		}
   178  	} else if l >= 4 { // 4-7
   179  		x = x.AsInt32x4().SetElem(0, *int32atP8(&s[0])).AsInt8x16()
   180  		if l >= 6 {
   181  			x = x.AsInt16x8().SetElem(4/2, *int16atP8(&s[4])).AsInt8x16()
   182  			if l == 7 {
   183  				x = x.SetElem(6, s[6])
   184  			}
   185  		} else if l == 5 {
   186  			x = x.SetElem(4, s[4])
   187  		}
   188  	} else if l >= 2 { // 2,3
   189  		x = x.AsInt16x8().SetElem(0, *int16atP8(&s[0])).AsInt8x16()
   190  		if l == 3 {
   191  			x = x.SetElem(2, s[2])
   192  		}
   193  	} else { // l == 1
   194  		x = x.SetElem(0, s[0])
   195  	}
   196  	return x
   197  }
   198  
   199  // StoreSlicePart stores the elements of x into the slice s.
   200  // It stores as many elements as will fit in s.
   201  // If s has 16 or more elements, the method is equivalent to x.StoreSlice.
   202  func (x Int8x16) StoreSlicePart(s []int8) {
   203  	l := len(s)
   204  	if l >= 16 {
   205  		x.StoreSlice(s)
   206  		return
   207  	}
   208  	if l == 0 {
   209  		return
   210  	}
   211  	if l >= 8 { // 8-15
   212  		*int64atP8(&s[0]) = x.AsInt64x2().GetElem(0)
   213  		if l >= 12 { // 12, 13, 14, 15
   214  			*int32atP8(&s[8]) = x.AsInt32x4().GetElem(8 / 4)
   215  			if l >= 14 {
   216  				*int16atP8(&s[12]) = x.AsInt16x8().GetElem(12 / 2)
   217  				if l == 15 {
   218  					s[14] = x.GetElem(14)
   219  				}
   220  			} else if l == 13 {
   221  				s[12] = x.GetElem(12)
   222  			}
   223  		} else if l >= 10 { // 10, 11
   224  			*int16atP8(&s[8]) = x.AsInt16x8().GetElem(8 / 2)
   225  			if l == 11 {
   226  				s[10] = x.GetElem(10)
   227  			}
   228  		} else if l == 9 {
   229  			s[8] = x.GetElem(8)
   230  		}
   231  	} else if l >= 4 { // 4-7
   232  		*int32atP8(&s[0]) = x.AsInt32x4().GetElem(0)
   233  		if l >= 6 {
   234  			*int16atP8(&s[4]) = x.AsInt16x8().GetElem(4 / 2)
   235  			if l == 7 {
   236  				s[6] = x.GetElem(6)
   237  			}
   238  		} else if l == 5 {
   239  			s[4] = x.GetElem(4)
   240  		}
   241  	} else if l >= 2 { // 2,3
   242  		*int16atP8(&s[0]) = x.AsInt16x8().GetElem(0)
   243  		if l == 3 {
   244  			s[2] = x.GetElem(2)
   245  		}
   246  	} else { // l == 1
   247  		s[0] = x.GetElem(0)
   248  	}
   249  }
   250  
   251  // LoadInt16x8SlicePart loads a Int16x8 from the slice s.
   252  // If s has fewer than 8 elements, the remaining elements of the vector are filled with zeroes.
   253  // If s has 8 or more elements, the function is equivalent to LoadInt16x8Slice.
   254  func LoadInt16x8SlicePart(s []int16) Int16x8 {
   255  	l := len(s)
   256  	if l >= 8 {
   257  		return LoadInt16x8Slice(s)
   258  	}
   259  	var x Int16x8
   260  	if l == 0 {
   261  		return x
   262  	}
   263  	if l >= 4 { // 4-7
   264  		x = x.AsInt64x2().SetElem(0, *int64atP16(&s[0])).AsInt16x8()
   265  		if l >= 6 {
   266  			x = x.AsInt32x4().SetElem(4/2, *int32atP16(&s[4])).AsInt16x8()
   267  			if l == 7 {
   268  				x = x.SetElem(6, s[6])
   269  			}
   270  		} else if l == 5 {
   271  			x = x.SetElem(4, s[4])
   272  		}
   273  	} else if l >= 2 { // 2,3
   274  		x = x.AsInt32x4().SetElem(0, *int32atP16(&s[0])).AsInt16x8()
   275  		if l == 3 {
   276  			x = x.SetElem(2, s[2])
   277  		}
   278  	} else { // l == 1
   279  		x = x.SetElem(0, s[0])
   280  	}
   281  	return x
   282  }
   283  
   284  // StoreSlicePart stores the elements of x into the slice s.
   285  // It stores as many elements as will fit in s.
   286  // If s has 8 or more elements, the method is equivalent to x.StoreSlice.
   287  func (x Int16x8) StoreSlicePart(s []int16) {
   288  	l := len(s)
   289  	if l >= 8 {
   290  		x.StoreSlice(s)
   291  		return
   292  	}
   293  	if l == 0 {
   294  		return
   295  	}
   296  	if l >= 4 { // 4-7
   297  		*int64atP16(&s[0]) = x.AsInt64x2().GetElem(0)
   298  		if l >= 6 {
   299  			*int32atP16(&s[4]) = x.AsInt32x4().GetElem(4 / 2)
   300  			if l == 7 {
   301  				s[6] = x.GetElem(6)
   302  			}
   303  		} else if l == 5 {
   304  			s[4] = x.GetElem(4)
   305  		}
   306  	} else if l >= 2 { // 2,3
   307  		*int32atP16(&s[0]) = x.AsInt32x4().GetElem(0)
   308  		if l == 3 {
   309  			s[2] = x.GetElem(2)
   310  		}
   311  	} else { // l == 1
   312  		s[0] = x.GetElem(0)
   313  	}
   314  	return
   315  }
   316  

View as plain text