first commit

This commit is contained in:
Ayxan
2022-05-23 00:16:32 +04:00
commit d660f2a4ca
24786 changed files with 4428337 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(void)
{
float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f);
/* MAXMIN */
int ret = (int)vgetq_lane_f32(vmaxnmq_f32(v1, v2), 0);
ret += (int)vgetq_lane_f32(vminnmq_f32(v1, v2), 0);
/* ROUNDING */
ret += (int)vgetq_lane_f32(vrndq_f32(v1), 0);
#ifdef __aarch64__
{
float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0);
/* MAXMIN */
ret += (int)vgetq_lane_f64(vmaxnmq_f64(vd1, vd2), 0);
ret += (int)vgetq_lane_f64(vminnmq_f64(vd1, vd2), 0);
/* ROUNDING */
ret += (int)vgetq_lane_f64(vrndq_f64(vd1), 0);
}
#endif
return ret;
}

View File

@@ -0,0 +1,15 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(void)
{
uint8x16_t v1 = vdupq_n_u8((unsigned char)1), v2 = vdupq_n_u8((unsigned char)2);
uint32x4_t va = vdupq_n_u32(3);
int ret = (int)vgetq_lane_u32(vdotq_u32(va, v1, v2), 0);
#ifdef __aarch64__
ret += (int)vgetq_lane_u32(vdotq_laneq_u32(va, v1, v2, 0), 0);
#endif
return ret;
}

View File

@@ -0,0 +1,17 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(void)
{
float16x8_t vhp = vdupq_n_f16((float16_t)1);
float16x4_t vlhp = vdup_n_f16((float16_t)1);
float32x4_t vf = vdupq_n_f32(1.0f);
float32x2_t vlf = vdup_n_f32(1.0f);
int ret = (int)vget_lane_f32(vfmlal_low_u32(vlf, vlhp, vlhp), 0);
ret += (int)vgetq_lane_f32(vfmlslq_high_u32(vf, vhp, vhp), 0);
return ret;
}

View File

@@ -0,0 +1,14 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(void)
{
float16x8_t vhp = vdupq_n_f16((float16_t)-1);
float16x4_t vlhp = vdup_n_f16((float16_t)-1);
int ret = (int)vgetq_lane_f16(vabdq_f16(vhp, vhp), 0);
ret += (int)vget_lane_f16(vabd_f16(vlhp, vlhp), 0);
return ret;
}

View File

@@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __AVX__
#error "HOST/ARCH doesn't support AVX"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m256 a = _mm256_add_ps(_mm256_loadu_ps((const float*)argv[argc-1]), _mm256_loadu_ps((const float*)argv[1]));
return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a));
}

View File

@@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __AVX2__
#error "HOST/ARCH doesn't support AVX2"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m256i a = _mm256_abs_epi16(_mm256_loadu_si256((const __m256i*)argv[argc-1]));
return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
}

View File

@@ -0,0 +1,22 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __AVX512VNNI__
#error "HOST/ARCH doesn't support CascadeLake AVX512 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
/* VNNI */
__m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]);
a = _mm512_dpbusd_epi32(a, _mm512_setzero_si512(), a);
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}

View File

@@ -0,0 +1,24 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__AVX512VBMI__) || !defined(__AVX512IFMA__)
#error "HOST/ARCH doesn't support CannonLake AVX512 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]);
/* IFMA */
a = _mm512_madd52hi_epu64(a, a, _mm512_setzero_si512());
/* VMBI */
a = _mm512_permutex2var_epi8(a, _mm512_setzero_si512(), a);
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}

View File

@@ -0,0 +1,26 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512BITALG__) || !defined(__AVX512VPOPCNTDQ__)
#error "HOST/ARCH doesn't support IceLake AVX512 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]);
/* VBMI2 */
a = _mm512_shrdv_epi64(a, a, _mm512_setzero_si512());
/* BITLAG */
a = _mm512_popcnt_epi8(a);
/* VPOPCNTDQ */
a = _mm512_popcnt_epi64(a);
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}

View File

@@ -0,0 +1,25 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__AVX512ER__) || !defined(__AVX512PF__)
#error "HOST/ARCH doesn't support Knights Landing AVX512 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
int base[128];
__m512d ad = _mm512_loadu_pd((const __m512d*)argv[argc-1]);
/* ER */
__m512i a = _mm512_castpd_si512(_mm512_exp2a23_pd(ad));
/* PF */
_mm512_mask_prefetch_i64scatter_pd(base, _mm512_cmpeq_epi64_mask(a, a), a, 1, _MM_HINT_T1);
return base[0];
}

View File

@@ -0,0 +1,30 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__AVX5124FMAPS__) || !defined(__AVX5124VNNIW__) || !defined(__AVX512VPOPCNTDQ__)
#error "HOST/ARCH doesn't support Knights Mill AVX512 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m512i a = _mm512_loadu_si512((const __m512i*)argv[argc-1]);
__m512 b = _mm512_loadu_ps((const __m512*)argv[argc-2]);
/* 4FMAPS */
b = _mm512_4fmadd_ps(b, b, b, b, b, NULL);
/* 4VNNIW */
a = _mm512_4dpwssd_epi32(a, a, a, a, a, NULL);
/* VPOPCNTDQ */
a = _mm512_popcnt_epi64(a);
a = _mm512_add_epi32(a, _mm512_castps_si512(b));
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}

View File

@@ -0,0 +1,26 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__AVX512VL__) || !defined(__AVX512BW__) || !defined(__AVX512DQ__)
#error "HOST/ARCH doesn't support SkyLake AVX512 features"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m512i aa = _mm512_abs_epi32(_mm512_loadu_si512((const __m512i*)argv[argc-1]));
/* VL */
__m256i a = _mm256_abs_epi64(_mm512_extracti64x4_epi64(aa, 1));
/* DQ */
__m512i b = _mm512_broadcast_i32x8(a);
/* BW */
b = _mm512_abs_epi16(b);
return _mm_cvtsi128_si32(_mm512_castsi512_si128(b));
}

View File

@@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __AVX512CD__
#error "HOST/ARCH doesn't support AVX512CD"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m512i a = _mm512_lzcnt_epi32(_mm512_loadu_si512((const __m512i*)argv[argc-1]));
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}

View File

@@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __AVX512F__
#error "HOST/ARCH doesn't support AVX512F"
#endif
#endif
#include <immintrin.h>
int main(int argc, char **argv)
{
__m512i a = _mm512_abs_epi32(_mm512_loadu_si512((const __m512i*)argv[argc-1]));
return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
}

View File

@@ -0,0 +1,22 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __F16C__
#error "HOST/ARCH doesn't support F16C"
#endif
#endif
#include <emmintrin.h>
#include <immintrin.h>
int main(int argc, char **argv)
{
__m128 a = _mm_cvtph_ps(_mm_loadu_si128((const __m128i*)argv[argc-1]));
__m256 a8 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i*)argv[argc-2]));
return (int)(_mm_cvtss_f32(a) + _mm_cvtss_f32(_mm256_castps256_ps128(a8)));
}

View File

@@ -0,0 +1,22 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__FMA__) && !defined(__AVX2__)
#error "HOST/ARCH doesn't support FMA3"
#endif
#endif
#include <xmmintrin.h>
#include <immintrin.h>
int main(int argc, char **argv)
{
__m256 a = _mm256_loadu_ps((const float*)argv[argc-1]);
a = _mm256_fmadd_ps(a, a, a);
return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a));
}

View File

@@ -0,0 +1,13 @@
#include <immintrin.h>
#ifdef _MSC_VER
#include <ammintrin.h>
#else
#include <x86intrin.h>
#endif
int main(int argc, char **argv)
{
__m256 a = _mm256_loadu_ps((const float*)argv[argc-1]);
a = _mm256_macc_ps(a, a, a);
return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a));
}

View File

@@ -0,0 +1,15 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(void)
{
float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f);
int ret = (int)vgetq_lane_f32(vmulq_f32(v1, v2), 0);
#ifdef __aarch64__
float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0);
ret += (int)vgetq_lane_f64(vmulq_f64(vd1, vd2), 0);
#endif
return ret;
}

View File

@@ -0,0 +1,11 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(void)
{
short z4[] = {0, 0, 0, 0, 0, 0, 0, 0};
float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16((const short*)z4));
return (int)vgetq_lane_f32(v_z4, 0);
}

View File

@@ -0,0 +1,19 @@
#ifdef _MSC_VER
#include <Intrin.h>
#endif
#include <arm_neon.h>
int main(void)
{
float32x4_t v1 = vdupq_n_f32(1.0f);
float32x4_t v2 = vdupq_n_f32(2.0f);
float32x4_t v3 = vdupq_n_f32(3.0f);
int ret = (int)vgetq_lane_f32(vfmaq_f32(v1, v2, v3), 0);
#ifdef __aarch64__
float64x2_t vd1 = vdupq_n_f64(1.0);
float64x2_t vd2 = vdupq_n_f64(2.0);
float64x2_t vd3 = vdupq_n_f64(3.0);
ret += (int)vgetq_lane_f64(vfmaq_f64(vd1, vd2, vd3), 0);
#endif
return ret;
}

View File

@@ -0,0 +1,32 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env vr `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#if !defined(__SSE4_2__) && !defined(__POPCNT__)
#error "HOST/ARCH doesn't support POPCNT"
#endif
#endif
#ifdef _MSC_VER
#include <nmmintrin.h>
#else
#include <popcntintrin.h>
#endif
int main(int argc, char **argv)
{
// To make sure popcnt instructions are generated
// and been tested against the assembler
unsigned long long a = *((unsigned long long*)argv[argc-1]);
unsigned int b = *((unsigned int*)argv[argc-2]);
#if defined(_M_X64) || defined(__x86_64__)
a = _mm_popcnt_u64(a);
#endif
b = _mm_popcnt_u32(b);
return (int)a + b;
}

View File

@@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __SSE__
#error "HOST/ARCH doesn't support SSE"
#endif
#endif
#include <xmmintrin.h>
int main(void)
{
__m128 a = _mm_add_ps(_mm_setzero_ps(), _mm_setzero_ps());
return (int)_mm_cvtss_f32(a);
}

View File

@@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __SSE2__
#error "HOST/ARCH doesn't support SSE2"
#endif
#endif
#include <emmintrin.h>
int main(void)
{
__m128i a = _mm_add_epi16(_mm_setzero_si128(), _mm_setzero_si128());
return _mm_cvtsi128_si32(a);
}

View File

@@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __SSE3__
#error "HOST/ARCH doesn't support SSE3"
#endif
#endif
#include <pmmintrin.h>
int main(void)
{
__m128 a = _mm_hadd_ps(_mm_setzero_ps(), _mm_setzero_ps());
return (int)_mm_cvtss_f32(a);
}

View File

@@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __SSE4_1__
#error "HOST/ARCH doesn't support SSE41"
#endif
#endif
#include <smmintrin.h>
int main(void)
{
__m128 a = _mm_floor_ps(_mm_setzero_ps());
return (int)_mm_cvtss_f32(a);
}

View File

@@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __SSE4_2__
#error "HOST/ARCH doesn't support SSE42"
#endif
#endif
#include <smmintrin.h>
int main(void)
{
__m128 a = _mm_hadd_ps(_mm_setzero_ps(), _mm_setzero_ps());
return (int)_mm_cvtss_f32(a);
}

View File

@@ -0,0 +1,20 @@
#if defined(DETECT_FEATURES) && defined(__INTEL_COMPILER)
/*
* Unlike GCC and CLANG, Intel Compiler exposes all supported intrinsics,
* whether or not the build options for those features are specified.
* Therefore, we must test #definitions of CPU features when option native/host
* is enabled via `--cpu-baseline` or through env var `CFLAGS` otherwise
* the test will be broken and leads to enable all possible features.
*/
#ifndef __SSSE3__
#error "HOST/ARCH doesn't support SSSE3"
#endif
#endif
#include <tmmintrin.h>
int main(void)
{
__m128i a = _mm_hadd_epi16(_mm_setzero_si128(), _mm_setzero_si128());
return (int)_mm_cvtsi128_si32(a);
}

View File

@@ -0,0 +1,21 @@
#ifndef __VSX__
#error "VSX is not supported"
#endif
#include <altivec.h>
#if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__))
#define vsx_ld vec_vsx_ld
#define vsx_st vec_vsx_st
#else
#define vsx_ld vec_xl
#define vsx_st vec_xst
#endif
int main(void)
{
unsigned int zout[4];
unsigned int z4[] = {0, 0, 0, 0};
__vector unsigned int v_z4 = vsx_ld(0, z4);
vsx_st(v_z4, 0, zout);
return zout[0];
}

View File

@@ -0,0 +1,13 @@
#ifndef __VSX__
#error "VSX is not supported"
#endif
#include <altivec.h>
typedef __vector unsigned long long v_uint64x2;
int main(void)
{
v_uint64x2 z2 = (v_uint64x2){0, 0};
z2 = (v_uint64x2)vec_cmpeq(z2, z2);
return (int)vec_extract(z2, 0);
}

View File

@@ -0,0 +1,13 @@
#ifndef __VSX__
#error "VSX is not supported"
#endif
#include <altivec.h>
typedef __vector unsigned int v_uint32x4;
int main(void)
{
v_uint32x4 z4 = (v_uint32x4){0, 0, 0, 0};
z4 = vec_absd(z4, z4);
return (int)vec_extract(z4, 0);
}

View File

@@ -0,0 +1,12 @@
#include <immintrin.h>
#ifdef _MSC_VER
#include <ammintrin.h>
#else
#include <x86intrin.h>
#endif
int main(void)
{
__m128i a = _mm_comge_epu32(_mm_setzero_si128(), _mm_setzero_si128());
return _mm_cvtsi128_si32(a);
}

View File

@@ -0,0 +1,18 @@
#include <immintrin.h>
/**
* Test BW mask operations due to:
* - MSVC has supported it since vs2019 see,
* https://developercommunity.visualstudio.com/content/problem/518298/missing-avx512bw-mask-intrinsics.html
* - Clang >= v8.0
* - GCC >= v7.1
*/
int main(void)
{
__mmask64 m64 = _mm512_cmpeq_epi8_mask(_mm512_set1_epi8((char)1), _mm512_set1_epi8((char)1));
m64 = _kor_mask64(m64, m64);
m64 = _kxor_mask64(m64, m64);
m64 = _cvtu64_mask64(_cvtmask64_u64(m64));
m64 = _mm512_kunpackd(m64, m64);
m64 = (__mmask64)_mm512_kunpackw((__mmask32)m64, (__mmask32)m64);
return (int)_cvtmask64_u64(m64);
}

View File

@@ -0,0 +1,16 @@
#include <immintrin.h>
/**
* Test DQ mask operations due to:
* - MSVC has supported it since vs2019 see,
* https://developercommunity.visualstudio.com/content/problem/518298/missing-avx512bw-mask-intrinsics.html
* - Clang >= v8.0
* - GCC >= v7.1
*/
int main(void)
{
__mmask8 m8 = _mm512_cmpeq_epi64_mask(_mm512_set1_epi64(1), _mm512_set1_epi64(1));
m8 = _kor_mask8(m8, m8);
m8 = _kxor_mask8(m8, m8);
m8 = _cvtu32_mask8(_cvtmask8_u32(m8));
return (int)_cvtmask8_u32(m8);
}

View File

@@ -0,0 +1,41 @@
#include <immintrin.h>
/**
* The following intrinsics don't have direct native support but compilers
* tend to emulate them.
* They're usually supported by gcc >= 7.1, clang >= 4 and icc >= 19
*/
int main(void)
{
__m512 one_ps = _mm512_set1_ps(1.0f);
__m512d one_pd = _mm512_set1_pd(1.0);
__m512i one_i64 = _mm512_set1_epi64(1);
// add
float sum_ps = _mm512_reduce_add_ps(one_ps);
double sum_pd = _mm512_reduce_add_pd(one_pd);
int sum_int = (int)_mm512_reduce_add_epi64(one_i64);
sum_int += (int)_mm512_reduce_add_epi32(one_i64);
// mul
sum_ps += _mm512_reduce_mul_ps(one_ps);
sum_pd += _mm512_reduce_mul_pd(one_pd);
sum_int += (int)_mm512_reduce_mul_epi64(one_i64);
sum_int += (int)_mm512_reduce_mul_epi32(one_i64);
// min
sum_ps += _mm512_reduce_min_ps(one_ps);
sum_pd += _mm512_reduce_min_pd(one_pd);
sum_int += (int)_mm512_reduce_min_epi32(one_i64);
sum_int += (int)_mm512_reduce_min_epu32(one_i64);
sum_int += (int)_mm512_reduce_min_epi64(one_i64);
// max
sum_ps += _mm512_reduce_max_ps(one_ps);
sum_pd += _mm512_reduce_max_pd(one_pd);
sum_int += (int)_mm512_reduce_max_epi32(one_i64);
sum_int += (int)_mm512_reduce_max_epu32(one_i64);
sum_int += (int)_mm512_reduce_max_epi64(one_i64);
// and
sum_int += (int)_mm512_reduce_and_epi32(one_i64);
sum_int += (int)_mm512_reduce_and_epi64(one_i64);
// or
sum_int += (int)_mm512_reduce_or_epi32(one_i64);
sum_int += (int)_mm512_reduce_or_epi64(one_i64);
return (int)sum_ps + (int)sum_pd + sum_int;
}

View File

@@ -0,0 +1,36 @@
/**
* Testing ASM VSX register number fixer '%x<n>'
*
* old versions of CLANG doesn't support %x<n> in the inline asm template
* which fixes register number when using any of the register constraints wa, wd, wf.
*
* xref:
* - https://bugs.llvm.org/show_bug.cgi?id=31837
* - https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html
*/
#ifndef __VSX__
#error "VSX is not supported"
#endif
#include <altivec.h>
#if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__))
#define vsx_ld vec_vsx_ld
#define vsx_st vec_vsx_st
#else
#define vsx_ld vec_xl
#define vsx_st vec_xst
#endif
int main(void)
{
float z4[] = {0, 0, 0, 0};
signed int zout[] = {0, 0, 0, 0};
__vector float vz4 = vsx_ld(0, z4);
__vector signed int asm_ret = vsx_ld(0, zout);
__asm__ ("xvcvspsxws %x0,%x1" : "=wa" (vz4) : "wa" (asm_ret));
vsx_st(asm_ret, 0, zout);
return zout[0];
}

View File

@@ -0,0 +1 @@
int test_flags;