Veritable Lasagna
An Allocator & Data Structure Library for C.
Loading...
Searching...
No Matches
vl_simd_portable.c
Go to the documentation of this file.
1
6#include <string.h>
7#include <vl/vl_simd.h>
8
9static vl_simd_vec4_f32 vlSIMDLoadVec4F32Portable(const vl_float32_t* ptr)
10{
11 vl_simd_vec4_f32 result;
12 memcpy(result.components, ptr, sizeof(result.components));
13 return result;
14}
15
16static void vlSIMDStoreVec4F32Portable(vl_float32_t* ptr, vl_simd_vec4_f32 v)
17{
18 memcpy(ptr, v.components, sizeof(v.components));
19}
20
21static vl_simd_vec4_f32 vlSIMDSplatVec4F32Portable(vl_float32_t scalar)
22{
23 vl_simd_vec4_f32 result;
24 for (int i = 0; i < 4; i++)
25 result.components[i] = scalar;
26 return result;
27}
28
29static vl_simd_vec4_f32 vlSIMDAddVec4F32Portable(vl_simd_vec4_f32 a, vl_simd_vec4_f32 b)
30{
31 vl_simd_vec4_f32 result;
32 for (int i = 0; i < 4; i++)
33 result.components[i] = a.components[i] + b.components[i];
34 return result;
35}
36
37static vl_simd_vec4_f32 vlSIMDSubVec4F32Portable(vl_simd_vec4_f32 a, vl_simd_vec4_f32 b)
38{
39 vl_simd_vec4_f32 result;
40 for (int i = 0; i < 4; i++)
41 result.components[i] = a.components[i] - b.components[i];
42 return result;
43}
44
45static vl_simd_vec4_f32 vlSIMDMulVec4F32Portable(vl_simd_vec4_f32 a, vl_simd_vec4_f32 b)
46{
47 vl_simd_vec4_f32 result;
48 for (int i = 0; i < 4; i++)
49 result.components[i] = a.components[i] * b.components[i];
50 return result;
51}
52
53static vl_simd_vec4_f32 vlSIMDDivVec4F32Portable(vl_simd_vec4_f32 a, vl_simd_vec4_f32 b)
54{
55 vl_simd_vec4_f32 result;
56 for (int i = 0; i < 4; i++)
57 result.components[i] = a.components[i] / b.components[i];
58 return result;
59}
60
61static vl_simd_vec4_f32 vlSIMDFmaVec4F32Portable(vl_simd_vec4_f32 a, vl_simd_vec4_f32 b, vl_simd_vec4_f32 c)
62{
63 vl_simd_vec4_f32 result;
64 for (int i = 0; i < 4; i++)
65 result.components[i] = a.components[i] * b.components[i] + c.components[i];
66 return result;
67}
68
69static vl_float32_t vlSIMDHsumVec4F32Portable(vl_simd_vec4_f32 v)
70{
71 return v.components[0] + v.components[1] + v.components[2] + v.components[3];
72}
73
74static vl_simd_vec8_f32 vlSIMDLoadVec8F32Portable(const vl_float32_t* ptr)
75{
76 vl_simd_vec8_f32 result;
77 memcpy(result.components, ptr, sizeof(result.components));
78 return result;
79}
80
81static void vlSIMDStoreVec8F32Portable(vl_float32_t* ptr, vl_simd_vec8_f32 v)
82{
83 memcpy(ptr, v.components, sizeof(v.components));
84}
85
86static vl_simd_vec8_f32 vlSIMDAddVec8F32Portable(vl_simd_vec8_f32 a, vl_simd_vec8_f32 b)
87{
88 vl_simd_vec8_f32 result;
89 for (int i = 0; i < 8; i++)
90 result.components[i] = a.components[i] + b.components[i];
91 return result;
92}
93
94static vl_simd_vec8_f32 vlSIMDMulVec8F32Portable(vl_simd_vec8_f32 a, vl_simd_vec8_f32 b)
95{
96 vl_simd_vec8_f32 result;
97 for (int i = 0; i < 8; i++)
98 result.components[i] = a.components[i] * b.components[i];
99 return result;
100}
101
102static vl_simd_vec8_f32 vlSIMDFmaVec8F32Portable(vl_simd_vec8_f32 a, vl_simd_vec8_f32 b, vl_simd_vec8_f32 c)
103{
104 vl_simd_vec8_f32 result;
105 for (int i = 0; i < 8; i++)
106 result.components[i] = a.components[i] * b.components[i] + c.components[i];
107 return result;
108}
109
110static vl_simd_vec4_f32 vlSIMDCmpPortable(vl_simd_vec4_f32 a, vl_simd_vec4_f32 b,
111 int (*cmp_fn)(vl_float32_t, vl_float32_t))
112{
113 vl_simd_vec4_f32 result;
114 for (int i = 0; i < 4; i++)
115 {
116 vl_uint32_t mask = cmp_fn(a.components[i], b.components[i]) ? 0xFFFFFFFFu : 0u;
117 memcpy(&result.components[i], &mask, sizeof(vl_float32_t));
118 }
119 return result;
120}
121
122static int vlSIMDLtCmp(vl_float32_t a, vl_float32_t b) { return a < b; }
123static int vlSIMDGtCmp(vl_float32_t a, vl_float32_t b) { return a > b; }
124static int vlSIMDEqCmp(vl_float32_t a, vl_float32_t b) { return a == b; }
125
126static vl_simd_vec4_f32 vlSIMDLtVec4F32Portable(vl_simd_vec4_f32 a, vl_simd_vec4_f32 b)
127{
128 return vlSIMDCmpPortable(a, b, vlSIMDLtCmp);
129}
130
131static vl_simd_vec4_f32 vlSIMDGtVec4F32Portable(vl_simd_vec4_f32 a, vl_simd_vec4_f32 b)
132{
133 return vlSIMDCmpPortable(a, b, vlSIMDGtCmp);
134}
135
136static vl_simd_vec4_f32 vlSIMDEqVec4F32Portable(vl_simd_vec4_f32 a, vl_simd_vec4_f32 b)
137{
138 return vlSIMDCmpPortable(a, b, vlSIMDEqCmp);
139}
140
141static vl_simd_vec4_f32 vlSIMDAndVec4F32Portable(vl_simd_vec4_f32 a, vl_simd_vec4_f32 b)
142{
143 vl_simd_vec4_f32 result;
144 for (int i = 0; i < 4; i++)
145 {
146 vl_uint32_t ua, ub, ur;
147 memcpy(&ua, &a.components[i], sizeof(ua));
148 memcpy(&ub, &b.components[i], sizeof(ub));
149 ur = ua & ub;
150 memcpy(&result.components[i], &ur, sizeof(result.components[i]));
151 }
152 return result;
153}
154
155static vl_simd_vec4_f32 vlSIMDOrVec4F32Portable(vl_simd_vec4_f32 a, vl_simd_vec4_f32 b)
156{
157 vl_simd_vec4_f32 result;
158 for (int i = 0; i < 4; i++)
159 {
160 vl_uint32_t ua, ub, ur;
161 memcpy(&ua, &a.components[i], sizeof(ua));
162 memcpy(&ub, &b.components[i], sizeof(ub));
163 ur = ua | ub;
164 memcpy(&result.components[i], &ur, sizeof(result.components[i]));
165 }
166 return result;
167}
168
169static vl_simd_vec4_f32 vlSIMDXorVec4F32Portable(vl_simd_vec4_f32 a, vl_simd_vec4_f32 b)
170{
171 vl_simd_vec4_f32 result;
172 for (int i = 0; i < 4; i++)
173 {
174 vl_uint32_t ua, ub, ur;
175 memcpy(&ua, &a.components[i], sizeof(ua));
176 memcpy(&ub, &b.components[i], sizeof(ub));
177 ur = ua ^ ub;
178 memcpy(&result.components[i], &ur, sizeof(result.components[i]));
179 }
180 return result;
181}
182
183static vl_simd_vec4_f32 vlSIMDNotVec4F32Portable(vl_simd_vec4_f32 a)
184{
185 vl_simd_vec4_f32 result;
186 for (int i = 0; i < 4; i++)
187 {
188 vl_uint32_t ua, ur;
189 memcpy(&ua, &a.components[i], sizeof(ua));
190 ur = ~ua;
191 memcpy(&result.components[i], &ur, sizeof(result.components[i]));
192 }
193 return result;
194}
195
196/* Horizontal reductions */
197static vl_float32_t vlSIMDHmaxVec4F32Portable(vl_simd_vec4_f32 v)
198{
199 vl_float32_t max_val = v.components[0];
200 for (int i = 1; i < 4; i++)
201 {
202 if (v.components[i] > max_val)
203 max_val = v.components[i];
204 }
205 return max_val;
206}
207
208static vl_float32_t vlSIMDHminVec4F32Portable(vl_simd_vec4_f32 v)
209{
210 vl_float32_t min_val = v.components[0];
211 for (int i = 1; i < 4; i++)
212 {
213 if (v.components[i] < min_val)
214 min_val = v.components[i];
215 }
216 return min_val;
217}
218
219static vl_float32_t vlSIMDHprodVec4F32Portable(vl_simd_vec4_f32 v)
220{
221 return v.components[0] * v.components[1] * v.components[2] * v.components[3];
222}
223
224/* Lane operations */
225static vl_float32_t vlSIMDExtractLaneVec4F32Portable(vl_simd_vec4_f32 v, int lane)
226{
227 return v.components[lane & 3]; // Clamp to 0-3
228}
229
230static vl_simd_vec4_f32 vlSIMDBroadcastLaneVec4F32Portable(vl_simd_vec4_f32 v, int lane)
231{
232 vl_float32_t val = v.components[lane & 3];
233 return vlSIMDSplatVec4F32Portable(val);
234}
235
236/* Integer 32-bit */
237static vl_simd_vec4_i32 vlSIMDLoadVec4I32Portable(const vl_int32_t* ptr)
238{
239 vl_simd_vec4_i32 result;
240 result.components[0] = ptr[0];
241 result.components[1] = ptr[1];
242 result.components[2] = ptr[2];
243 result.components[3] = ptr[3];
244 return result;
245}
246
247static void vlSIMDStoreVec4I32Portable(vl_int32_t* ptr, vl_simd_vec4_i32 v)
248{
249 ptr[0] = v.components[0];
250 ptr[1] = v.components[1];
251 ptr[2] = v.components[2];
252 ptr[3] = v.components[3];
253}
254
255static vl_simd_vec4_i32 vlSIMDAddVec4I32Portable(vl_simd_vec4_i32 a, vl_simd_vec4_i32 b)
256{
257 vl_simd_vec4_i32 result;
258 for (int i = 0; i < 4; i++)
259 {
260 result.components[i] = a.components[i] + b.components[i];
261 }
262 return result;
263}
264
265static vl_simd_vec4_i32 vlSIMDMulVec4I32Portable(vl_simd_vec4_i32 a, vl_simd_vec4_i32 b)
266{
267 vl_simd_vec4_i32 result;
268 for (int i = 0; i < 4; i++)
269 {
270 result.components[i] = a.components[i] * b.components[i];
271 }
272 return result;
273}
274
275static vl_simd_vec8_f32 vlSIMDSplatVec8F32Portable(vl_float32_t scalar)
276{
277 vl_simd_vec8_f32 result;
278 for (int i = 0; i < 8; i++)
279 result.components[i] = scalar;
280 return result;
281}
282
283static vl_simd_vec8_f32 vlSIMDSubVec8F32Portable(vl_simd_vec8_f32 a, vl_simd_vec8_f32 b)
284{
285 vl_simd_vec8_f32 result;
286 for (int i = 0; i < 8; i++)
287 result.components[i] = a.components[i] - b.components[i];
288 return result;
289}
290
291static vl_simd_vec8_f32 vlSIMDLtVec8F32Portable(vl_simd_vec8_f32 a, vl_simd_vec8_f32 b)
292{
293 vl_simd_vec8_f32 result;
294 for (int i = 0; i < 8; i++)
295 {
296 vl_uint32_t mask = a.components[i] < b.components[i] ? 0xFFFFFFFFu : 0u;
297 memcpy(&result.components[i], &mask, sizeof(vl_float32_t));
298 }
299 return result;
300}
301
302static vl_simd_vec8_f32 vlSIMDGtVec8F32Portable(vl_simd_vec8_f32 a, vl_simd_vec8_f32 b)
303{
304 vl_simd_vec8_f32 result;
305 for (int i = 0; i < 8; i++)
306 {
307 vl_uint32_t mask = a.components[i] > b.components[i] ? 0xFFFFFFFFu : 0u;
308 memcpy(&result.components[i], &mask, sizeof(vl_float32_t));
309 }
310 return result;
311}
312
313static vl_simd_vec8_f32 vlSIMDEqVec8F32Portable(vl_simd_vec8_f32 a, vl_simd_vec8_f32 b)
314{
315 vl_simd_vec8_f32 result;
316 for (int i = 0; i < 8; i++)
317 {
318 vl_uint32_t mask = a.components[i] == b.components[i] ? 0xFFFFFFFFu : 0u;
319 memcpy(&result.components[i], &mask, sizeof(vl_float32_t));
320 }
321 return result;
322}
323
324static vl_simd_vec8_f32 vlSIMDAndVec8F32Portable(vl_simd_vec8_f32 a, vl_simd_vec8_f32 b)
325{
326 vl_simd_vec8_f32 result;
327 for (int i = 0; i < 8; i++)
328 {
329 vl_uint32_t ua, ub, ur;
330 memcpy(&ua, &a.components[i], sizeof(ua));
331 memcpy(&ub, &b.components[i], sizeof(ub));
332 ur = ua & ub;
333 memcpy(&result.components[i], &ur, sizeof(result.components[i]));
334 }
335 return result;
336}
337
338static vl_simd_vec8_f32 vlSIMDOrVec8F32Portable(vl_simd_vec8_f32 a, vl_simd_vec8_f32 b)
339{
340 vl_simd_vec8_f32 result;
341 for (int i = 0; i < 8; i++)
342 {
343 vl_uint32_t ua, ub, ur;
344 memcpy(&ua, &a.components[i], sizeof(ua));
345 memcpy(&ub, &b.components[i], sizeof(ub));
346 ur = ua | ub;
347 memcpy(&result.components[i], &ur, sizeof(result.components[i]));
348 }
349 return result;
350}
351
352static vl_simd_vec8_f32 vlSIMDXorVec8F32Portable(vl_simd_vec8_f32 a, vl_simd_vec8_f32 b)
353{
354 vl_simd_vec8_f32 result;
355 for (int i = 0; i < 8; i++)
356 {
357 vl_uint32_t ua, ub, ur;
358 memcpy(&ua, &a.components[i], sizeof(ua));
359 memcpy(&ub, &b.components[i], sizeof(ub));
360 ur = ua ^ ub;
361 memcpy(&result.components[i], &ur, sizeof(result.components[i]));
362 }
363 return result;
364}
365
366static vl_simd_vec8_f32 vlSIMDNotVec8F32Portable(vl_simd_vec8_f32 a)
367{
368 vl_simd_vec8_f32 result;
369 for (int i = 0; i < 8; i++)
370 {
371 vl_uint32_t ua, ur;
372 memcpy(&ua, &a.components[i], sizeof(ua));
373 ur = ~ua;
374 memcpy(&result.components[i], &ur, sizeof(result.components[i]));
375 }
376 return result;
377}
378
379static vl_simd_vec8_i16 vlSIMDLoadVec8I16Portable(const vl_int16_t* ptr)
380{
381 vl_simd_vec8_i16 result;
382 for (int i = 0; i < 8; i++)
383 {
384 result.components[i] = ptr[i];
385 }
386 return result;
387}
388
389static void vlSIMDStoreVec8I16Portable(vl_int16_t* ptr, vl_simd_vec8_i16 v)
390{
391 for (int i = 0; i < 8; i++)
392 {
393 ptr[i] = v.components[i];
394 }
395}
396
397static vl_simd_vec8_i16 vlSIMDAddVec8I16Portable(vl_simd_vec8_i16 a, vl_simd_vec8_i16 b)
398{
399 vl_simd_vec8_i16 result;
400 for (int i = 0; i < 8; i++)
401 {
402 result.components[i] = a.components[i] + b.components[i];
403 }
404 return result;
405}
406
407/* Integer 8-bit */
408static vl_simd_vec32_u8 vlSIMDLoadVec32U8Portable(const vl_uint8_t* ptr)
409{
410 vl_simd_vec32_u8 result;
411 for (int i = 0; i < 32; i++)
412 {
413 result.components[i] = ptr[i];
414 }
415 return result;
416}
417
418static void vlSIMDStoreVec32U8Portable(vl_uint8_t* ptr, vl_simd_vec32_u8 v)
419{
420 for (int i = 0; i < 32; i++)
421 {
422 ptr[i] = v.components[i];
423 }
424}
425
426static void vlSIMDInitPortable(void)
427{
428 vlSIMDFunctions.load_vec4f32 = vlSIMDLoadVec4F32Portable;
429 vlSIMDFunctions.store_vec4f32 = vlSIMDStoreVec4F32Portable;
430 vlSIMDFunctions.splat_vec4f32 = vlSIMDSplatVec4F32Portable;
431 vlSIMDFunctions.add_vec4f32 = vlSIMDAddVec4F32Portable;
432 vlSIMDFunctions.sub_vec4f32 = vlSIMDSubVec4F32Portable;
433 vlSIMDFunctions.mul_vec4f32 = vlSIMDMulVec4F32Portable;
434 vlSIMDFunctions.div_vec4f32 = vlSIMDDivVec4F32Portable;
435 vlSIMDFunctions.fma_vec4f32 = vlSIMDFmaVec4F32Portable;
436 vlSIMDFunctions.hsum_vec4f32 = vlSIMDHsumVec4F32Portable;
437 vlSIMDFunctions.load_vec8f32 = vlSIMDLoadVec8F32Portable;
438 vlSIMDFunctions.store_vec8f32 = vlSIMDStoreVec8F32Portable;
439 vlSIMDFunctions.add_vec8f32 = vlSIMDAddVec8F32Portable;
440 vlSIMDFunctions.mul_vec8f32 = vlSIMDMulVec8F32Portable;
441 vlSIMDFunctions.fma_vec8f32 = vlSIMDFmaVec8F32Portable;
442 vlSIMDFunctions.splat_vec8f32 = vlSIMDSplatVec8F32Portable;
443 vlSIMDFunctions.sub_vec8f32 = vlSIMDSubVec8F32Portable;
444 vlSIMDFunctions.lt_vec8f32 = vlSIMDLtVec8F32Portable;
445 vlSIMDFunctions.gt_vec8f32 = vlSIMDGtVec8F32Portable;
446 vlSIMDFunctions.eq_vec8f32 = vlSIMDEqVec8F32Portable;
447 vlSIMDFunctions.and_vec8f32 = vlSIMDAndVec8F32Portable;
448 vlSIMDFunctions.or_vec8f32 = vlSIMDOrVec8F32Portable;
449 vlSIMDFunctions.xor_vec8f32 = vlSIMDXorVec8F32Portable;
450 vlSIMDFunctions.not_vec8f32 = vlSIMDNotVec8F32Portable;
451 vlSIMDFunctions.lt_vec4f32 = vlSIMDLtVec4F32Portable;
452 vlSIMDFunctions.gt_vec4f32 = vlSIMDGtVec4F32Portable;
453 vlSIMDFunctions.eq_vec4f32 = vlSIMDEqVec4F32Portable;
454 vlSIMDFunctions.and_vec4f32 = vlSIMDAndVec4F32Portable;
455 vlSIMDFunctions.or_vec4f32 = vlSIMDOrVec4F32Portable;
456 vlSIMDFunctions.xor_vec4f32 = vlSIMDXorVec4F32Portable;
457 vlSIMDFunctions.not_vec4f32 = vlSIMDNotVec4F32Portable;
458 vlSIMDFunctions.hmax_vec4f32 = vlSIMDHmaxVec4F32Portable;
459 vlSIMDFunctions.hmin_vec4f32 = vlSIMDHminVec4F32Portable;
460 vlSIMDFunctions.hprod_vec4f32 = vlSIMDHprodVec4F32Portable;
461 vlSIMDFunctions.extract_lane_vec4f32 = vlSIMDExtractLaneVec4F32Portable;
462 vlSIMDFunctions.broadcast_lane_vec4f32 = vlSIMDBroadcastLaneVec4F32Portable;
463 vlSIMDFunctions.load_vec4i32 = vlSIMDLoadVec4I32Portable;
464 vlSIMDFunctions.store_vec4i32 = vlSIMDStoreVec4I32Portable;
465 vlSIMDFunctions.add_vec4i32 = vlSIMDAddVec4I32Portable;
466 vlSIMDFunctions.mul_vec4i32 = vlSIMDMulVec4I32Portable;
467 vlSIMDFunctions.load_vec8i16 = vlSIMDLoadVec8I16Portable;
468 vlSIMDFunctions.store_vec8i16 = vlSIMDStoreVec8I16Portable;
469 vlSIMDFunctions.add_vec8i16 = vlSIMDAddVec8I16Portable;
470 vlSIMDFunctions.load_vec32u8 = vlSIMDLoadVec32U8Portable;
471 vlSIMDFunctions.store_vec32u8 = vlSIMDStoreVec32U8Portable;
472 vlSIMDFunctions.backend_name = "Portable C";
473}
VL_F32_T vl_float32_t
32-bit floating point number type.
Definition vl_numtypes.h:173
vl_simd_functions_t vlSIMDFunctions
Global SIMD function table.
Definition vl_simd.c:34
Transparent runtime-selected SIMD abstraction layer.
vl_simd_hmin_vec4f32_fn hmin_vec4f32
Definition vl_simd.h:413
vl_simd_bitwise_vec4f32_fn or_vec4f32
Definition vl_simd.h:409
vl_simd_lt_vec8f32_fn lt_vec8f32
Definition vl_simd.h:398
vl_simd_or_vec8f32_fn or_vec8f32
Definition vl_simd.h:402
vl_simd_hprod_vec4f32_fn hprod_vec4f32
Definition vl_simd.h:414
vl_simd_cmp_vec4f32_fn eq_vec4f32
Definition vl_simd.h:407
vl_simd_bitwise_vec4f32_fn and_vec4f32
Definition vl_simd.h:408
vl_simd_not_vec4f32_fn not_vec4f32
Definition vl_simd.h:411
vl_simd_store_vec4i32_fn store_vec4i32
Definition vl_simd.h:418
vl_simd_fma_vec8f32_fn fma_vec8f32
Definition vl_simd.h:395
vl_simd_eq_vec8f32_fn eq_vec8f32
Definition vl_simd.h:400
vl_simd_store_vec8i16_fn store_vec8i16
Definition vl_simd.h:422
vl_simd_load_vec4f32_fn load_vec4f32
Definition vl_simd.h:382
vl_simd_sub_vec8f32_fn sub_vec8f32
Definition vl_simd.h:397
vl_simd_load_vec8i16_fn load_vec8i16
Definition vl_simd.h:421
vl_simd_load_vec32u8_fn load_vec32u8
Definition vl_simd.h:424
const char * backend_name
Backend name string for logging/debugging (e.g., "AVX2", "NEON64").
Definition vl_simd.h:429
vl_simd_broadcast_lane_vec4f32_fn broadcast_lane_vec4f32
Definition vl_simd.h:416
vl_simd_cmp_vec4f32_fn gt_vec4f32
Definition vl_simd.h:406
vl_simd_mul_vec8f32_fn mul_vec8f32
Definition vl_simd.h:394
vl_simd_store_vec8f32_fn store_vec8f32
Definition vl_simd.h:392
vl_simd_store_vec4f32_fn store_vec4f32
Definition vl_simd.h:383
vl_simd_add_vec8f32_fn add_vec8f32
Definition vl_simd.h:393
vl_simd_vec4_f32
Definition vl_simd.h:223
vl_simd_and_vec8f32_fn and_vec8f32
Definition vl_simd.h:401
vl_simd_splat_vec4f32_fn splat_vec4f32
Definition vl_simd.h:384
vl_simd_load_vec8f32_fn load_vec8f32
Definition vl_simd.h:391
vl_simd_add_vec4i32_fn add_vec4i32
Definition vl_simd.h:419
vl_simd_load_vec4i32_fn load_vec4i32
Definition vl_simd.h:417
vl_simd_sub_vec4f32_fn sub_vec4f32
Definition vl_simd.h:386
vl_simd_hmax_vec4f32_fn hmax_vec4f32
Definition vl_simd.h:412
vl_simd_add_vec4f32_fn add_vec4f32
Definition vl_simd.h:385
vl_simd_fma_vec4f32_fn fma_vec4f32
Definition vl_simd.h:389
vl_simd_div_vec4f32_fn div_vec4f32
Definition vl_simd.h:388
vl_simd_vec4_i32
Definition vl_simd.h:251
vl_simd_bitwise_vec4f32_fn xor_vec4f32
Definition vl_simd.h:410
vl_simd_xor_vec8f32_fn xor_vec8f32
Definition vl_simd.h:403
vl_simd_store_vec32u8_fn store_vec32u8
Definition vl_simd.h:425
vl_simd_vec8_i16
Definition vl_simd.h:276
vl_simd_vec32_u8
Definition vl_simd.h:300
vl_simd_add_vec8i16_fn add_vec8i16
Definition vl_simd.h:423
vl_simd_mul_vec4i32_fn mul_vec4i32
Definition vl_simd.h:420
vl_simd_splat_vec8f32_fn splat_vec8f32
Definition vl_simd.h:396
vl_simd_hsum_vec4f32_fn hsum_vec4f32
Definition vl_simd.h:390
vl_simd_vec8_f32
Definition vl_simd.h:236
vl_simd_mul_vec4f32_fn mul_vec4f32
Definition vl_simd.h:387
vl_simd_gt_vec8f32_fn gt_vec8f32
Definition vl_simd.h:399
vl_simd_extract_lane_vec4f32_fn extract_lane_vec4f32
Definition vl_simd.h:415
vl_simd_cmp_vec4f32_fn lt_vec4f32
Definition vl_simd.h:405
vl_simd_not_vec8f32_fn not_vec8f32
Definition vl_simd.h:404