12 memcpy(result.components, ptr,
sizeof(result.components));
18 memcpy(ptr, v.components,
sizeof(v.components));
24 for (
int i = 0; i < 4; i++)
25 result.components[i] = scalar;
32 for (
int i = 0; i < 4; i++)
33 result.components[i] = a.components[i] + b.components[i];
40 for (
int i = 0; i < 4; i++)
41 result.components[i] = a.components[i] - b.components[i];
48 for (
int i = 0; i < 4; i++)
49 result.components[i] = a.components[i] * b.components[i];
56 for (
int i = 0; i < 4; i++)
57 result.components[i] = a.components[i] / b.components[i];
64 for (
int i = 0; i < 4; i++)
65 result.components[i] = a.components[i] * b.components[i] + c.components[i];
71 return v.components[0] + v.components[1] + v.components[2] + v.components[3];
77 memcpy(result.components, ptr,
sizeof(result.components));
83 memcpy(ptr, v.components,
sizeof(v.components));
89 for (
int i = 0; i < 8; i++)
90 result.components[i] = a.components[i] + b.components[i];
97 for (
int i = 0; i < 8; i++)
98 result.components[i] = a.components[i] * b.components[i];
105 for (
int i = 0; i < 8; i++)
106 result.components[i] = a.components[i] * b.components[i] + c.components[i];
114 for (
int i = 0; i < 4; i++)
116 vl_uint32_t mask = cmp_fn(a.components[i], b.components[i]) ? 0xFFFFFFFFu : 0u;
117 memcpy(&result.components[i], &mask,
sizeof(
vl_float32_t));
128 return vlSIMDCmpPortable(a, b, vlSIMDLtCmp);
133 return vlSIMDCmpPortable(a, b, vlSIMDGtCmp);
138 return vlSIMDCmpPortable(a, b, vlSIMDEqCmp);
144 for (
int i = 0; i < 4; i++)
146 vl_uint32_t ua, ub, ur;
147 memcpy(&ua, &a.components[i],
sizeof(ua));
148 memcpy(&ub, &b.components[i],
sizeof(ub));
150 memcpy(&result.components[i], &ur,
sizeof(result.components[i]));
158 for (
int i = 0; i < 4; i++)
160 vl_uint32_t ua, ub, ur;
161 memcpy(&ua, &a.components[i],
sizeof(ua));
162 memcpy(&ub, &b.components[i],
sizeof(ub));
164 memcpy(&result.components[i], &ur,
sizeof(result.components[i]));
172 for (
int i = 0; i < 4; i++)
174 vl_uint32_t ua, ub, ur;
175 memcpy(&ua, &a.components[i],
sizeof(ua));
176 memcpy(&ub, &b.components[i],
sizeof(ub));
178 memcpy(&result.components[i], &ur,
sizeof(result.components[i]));
186 for (
int i = 0; i < 4; i++)
189 memcpy(&ua, &a.components[i],
sizeof(ua));
191 memcpy(&result.components[i], &ur,
sizeof(result.components[i]));
200 for (
int i = 1; i < 4; i++)
202 if (v.components[i] > max_val)
203 max_val = v.components[i];
211 for (
int i = 1; i < 4; i++)
213 if (v.components[i] < min_val)
214 min_val = v.components[i];
221 return v.components[0] * v.components[1] * v.components[2] * v.components[3];
227 return v.components[lane & 3];
233 return vlSIMDSplatVec4F32Portable(val);
240 result.components[0] = ptr[0];
241 result.components[1] = ptr[1];
242 result.components[2] = ptr[2];
243 result.components[3] = ptr[3];
249 ptr[0] = v.components[0];
250 ptr[1] = v.components[1];
251 ptr[2] = v.components[2];
252 ptr[3] = v.components[3];
258 for (
int i = 0; i < 4; i++)
260 result.components[i] = a.components[i] + b.components[i];
268 for (
int i = 0; i < 4; i++)
270 result.components[i] = a.components[i] * b.components[i];
278 for (
int i = 0; i < 8; i++)
279 result.components[i] = scalar;
286 for (
int i = 0; i < 8; i++)
287 result.components[i] = a.components[i] - b.components[i];
294 for (
int i = 0; i < 8; i++)
296 vl_uint32_t mask = a.components[i] < b.components[i] ? 0xFFFFFFFFu : 0u;
297 memcpy(&result.components[i], &mask,
sizeof(
vl_float32_t));
305 for (
int i = 0; i < 8; i++)
307 vl_uint32_t mask = a.components[i] > b.components[i] ? 0xFFFFFFFFu : 0u;
308 memcpy(&result.components[i], &mask,
sizeof(
vl_float32_t));
316 for (
int i = 0; i < 8; i++)
318 vl_uint32_t mask = a.components[i] == b.components[i] ? 0xFFFFFFFFu : 0u;
319 memcpy(&result.components[i], &mask,
sizeof(
vl_float32_t));
327 for (
int i = 0; i < 8; i++)
329 vl_uint32_t ua, ub, ur;
330 memcpy(&ua, &a.components[i],
sizeof(ua));
331 memcpy(&ub, &b.components[i],
sizeof(ub));
333 memcpy(&result.components[i], &ur,
sizeof(result.components[i]));
341 for (
int i = 0; i < 8; i++)
343 vl_uint32_t ua, ub, ur;
344 memcpy(&ua, &a.components[i],
sizeof(ua));
345 memcpy(&ub, &b.components[i],
sizeof(ub));
347 memcpy(&result.components[i], &ur,
sizeof(result.components[i]));
355 for (
int i = 0; i < 8; i++)
357 vl_uint32_t ua, ub, ur;
358 memcpy(&ua, &a.components[i],
sizeof(ua));
359 memcpy(&ub, &b.components[i],
sizeof(ub));
361 memcpy(&result.components[i], &ur,
sizeof(result.components[i]));
369 for (
int i = 0; i < 8; i++)
372 memcpy(&ua, &a.components[i],
sizeof(ua));
374 memcpy(&result.components[i], &ur,
sizeof(result.components[i]));
382 for (
int i = 0; i < 8; i++)
384 result.components[i] = ptr[i];
391 for (
int i = 0; i < 8; i++)
393 ptr[i] = v.components[i];
400 for (
int i = 0; i < 8; i++)
402 result.components[i] = a.components[i] + b.components[i];
411 for (
int i = 0; i < 32; i++)
413 result.components[i] = ptr[i];
420 for (
int i = 0; i < 32; i++)
422 ptr[i] = v.components[i];
426static void vlSIMDInitPortable(
void)
VL_F32_T vl_float32_t
32-bit floating point number type.
Definition vl_numtypes.h:173
vl_simd_functions_t vlSIMDFunctions
Global SIMD function table.
Definition vl_simd.c:34
Transparent runtime-selected SIMD abstraction layer.
vl_simd_hmin_vec4f32_fn hmin_vec4f32
Definition vl_simd.h:413
vl_simd_bitwise_vec4f32_fn or_vec4f32
Definition vl_simd.h:409
vl_simd_lt_vec8f32_fn lt_vec8f32
Definition vl_simd.h:398
vl_simd_or_vec8f32_fn or_vec8f32
Definition vl_simd.h:402
vl_simd_hprod_vec4f32_fn hprod_vec4f32
Definition vl_simd.h:414
vl_simd_cmp_vec4f32_fn eq_vec4f32
Definition vl_simd.h:407
vl_simd_bitwise_vec4f32_fn and_vec4f32
Definition vl_simd.h:408
vl_simd_not_vec4f32_fn not_vec4f32
Definition vl_simd.h:411
vl_simd_store_vec4i32_fn store_vec4i32
Definition vl_simd.h:418
vl_simd_fma_vec8f32_fn fma_vec8f32
Definition vl_simd.h:395
vl_simd_eq_vec8f32_fn eq_vec8f32
Definition vl_simd.h:400
vl_simd_store_vec8i16_fn store_vec8i16
Definition vl_simd.h:422
vl_simd_load_vec4f32_fn load_vec4f32
Definition vl_simd.h:382
vl_simd_sub_vec8f32_fn sub_vec8f32
Definition vl_simd.h:397
vl_simd_load_vec8i16_fn load_vec8i16
Definition vl_simd.h:421
vl_simd_load_vec32u8_fn load_vec32u8
Definition vl_simd.h:424
const char * backend_name
Backend name string for logging/debugging (e.g., "AVX2", "NEON64").
Definition vl_simd.h:429
vl_simd_broadcast_lane_vec4f32_fn broadcast_lane_vec4f32
Definition vl_simd.h:416
vl_simd_cmp_vec4f32_fn gt_vec4f32
Definition vl_simd.h:406
vl_simd_mul_vec8f32_fn mul_vec8f32
Definition vl_simd.h:394
vl_simd_store_vec8f32_fn store_vec8f32
Definition vl_simd.h:392
vl_simd_store_vec4f32_fn store_vec4f32
Definition vl_simd.h:383
vl_simd_add_vec8f32_fn add_vec8f32
Definition vl_simd.h:393
vl_simd_vec4_f32
Definition vl_simd.h:223
vl_simd_and_vec8f32_fn and_vec8f32
Definition vl_simd.h:401
vl_simd_splat_vec4f32_fn splat_vec4f32
Definition vl_simd.h:384
vl_simd_load_vec8f32_fn load_vec8f32
Definition vl_simd.h:391
vl_simd_add_vec4i32_fn add_vec4i32
Definition vl_simd.h:419
vl_simd_load_vec4i32_fn load_vec4i32
Definition vl_simd.h:417
vl_simd_sub_vec4f32_fn sub_vec4f32
Definition vl_simd.h:386
vl_simd_hmax_vec4f32_fn hmax_vec4f32
Definition vl_simd.h:412
vl_simd_add_vec4f32_fn add_vec4f32
Definition vl_simd.h:385
vl_simd_fma_vec4f32_fn fma_vec4f32
Definition vl_simd.h:389
vl_simd_div_vec4f32_fn div_vec4f32
Definition vl_simd.h:388
vl_simd_vec4_i32
Definition vl_simd.h:251
vl_simd_bitwise_vec4f32_fn xor_vec4f32
Definition vl_simd.h:410
vl_simd_xor_vec8f32_fn xor_vec8f32
Definition vl_simd.h:403
vl_simd_store_vec32u8_fn store_vec32u8
Definition vl_simd.h:425
vl_simd_vec8_i16
Definition vl_simd.h:276
vl_simd_vec32_u8
Definition vl_simd.h:300
vl_simd_add_vec8i16_fn add_vec8i16
Definition vl_simd.h:423
vl_simd_mul_vec4i32_fn mul_vec4i32
Definition vl_simd.h:420
vl_simd_splat_vec8f32_fn splat_vec8f32
Definition vl_simd.h:396
vl_simd_hsum_vec4f32_fn hsum_vec4f32
Definition vl_simd.h:390
vl_simd_vec8_f32
Definition vl_simd.h:236
vl_simd_mul_vec4f32_fn mul_vec4f32
Definition vl_simd.h:387
vl_simd_gt_vec8f32_fn gt_vec8f32
Definition vl_simd.h:399
vl_simd_extract_lane_vec4f32_fn extract_lane_vec4f32
Definition vl_simd.h:415
vl_simd_cmp_vec4f32_fn lt_vec4f32
Definition vl_simd.h:405
vl_simd_not_vec8f32_fn not_vec8f32
Definition vl_simd.h:404