1851 lines
58 KiB
C++
1851 lines
58 KiB
C++
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <assert.h>
|
|
#include <float.h>
|
|
#include <math.h>
|
|
|
|
#include "SSE2NEONBinding.h"
|
|
#include "SSE2NEONTEST.h"
|
|
|
|
// SSE2NEONTEST performs a set of 'unit tests' making sure that each SSE call
|
|
// provides the output we expect. If this fires an assert, then something didn't match up.
|
|
|
|
|
|
#ifdef WIN32
|
|
|
|
#pragma warning(disable:4211)
|
|
|
|
#include <xmmintrin.h>
|
|
#include <emmintrin.h>
|
|
|
|
#else
|
|
|
|
#include "SSE2NEON.h"
|
|
|
|
#endif
|
|
|
|
namespace SSE2NEON
|
|
{
|
|
|
|
// hex representation of an IEEE NAN
|
|
const uint32_t inan = 0xffffffff;
|
|
|
|
static inline float getNAN(void)
|
|
{
|
|
const float *fn = (const float *)&inan;
|
|
return *fn;
|
|
}
|
|
|
|
static inline bool isNAN(float a)
|
|
{
|
|
const uint32_t *ia = (const uint32_t *)&a;
|
|
return (*ia) == inan ? true : false;
|
|
}
|
|
|
|
// Do a round operation that produces results the same as SSE instructions
|
|
static inline float bankersRounding(float val)
|
|
{
|
|
if (val < 0)
|
|
{
|
|
return -bankersRounding(-val);
|
|
}
|
|
float ret;
|
|
int32_t truncateInteger = int32_t(val);
|
|
int32_t roundInteger = int32_t(val + 0.5f);
|
|
float diff1 = val - float(truncateInteger); // Truncate value
|
|
float diff2 = val - float(roundInteger); // Round up value
|
|
if (diff2 < 0) diff2 *= -1; // get the positive difference from the round up value
|
|
// If it's closest to the truncate integer; then use it
|
|
if (diff1 < diff2)
|
|
{
|
|
ret = float(truncateInteger);
|
|
}
|
|
else if (diff2 < diff1) // if it's closest to the round-up integer; use it
|
|
{
|
|
ret = float(roundInteger);
|
|
}
|
|
else
|
|
{
|
|
// If it's equidistant between rounding up and rounding down, pick the one which is an even number
|
|
if (truncateInteger & 1) // If truncate is odd, then return the rounded integer
|
|
{
|
|
ret = float(roundInteger);
|
|
}
|
|
else
|
|
{
|
|
// If the rounded up value is odd, use return the truncated integer
|
|
ret = float(truncateInteger);
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
|
|
const char *SSE2NEONTest::getInstructionTestString(InstructionTest test)
|
|
{
|
|
const char *ret = "UNKNOWN!";
|
|
|
|
switch (test)
|
|
{
|
|
case IT_MM_SETZERO_SI128:
|
|
ret = "MM_SETZERO_SI128";
|
|
break;
|
|
case IT_MM_SETZERO_PS:
|
|
ret = "MM_SETZERO_PS";
|
|
break;
|
|
case IT_MM_SET1_PS:
|
|
ret = "MM_SET1_PS";
|
|
break;
|
|
case IT_MM_SET_PS1:
|
|
ret = "MM_SET_PS1";
|
|
break;
|
|
case IT_MM_SET_PS:
|
|
ret = "MM_SET_PS";
|
|
break;
|
|
case IT_MM_SETR_PS:
|
|
ret = "MM_SETR_PS";
|
|
break;
|
|
case IT_MM_SET1_EPI32:
|
|
ret = "MM_SET1_EPI32";
|
|
break;
|
|
case IT_MM_SET_EPI32:
|
|
ret = "MM_SET_EPI32";
|
|
break;
|
|
case IT_MM_STORE_PS:
|
|
ret = "MM_STORE_PS";
|
|
break;
|
|
case IT_MM_STOREU_PS:
|
|
ret = "MM_STOREU_PS";
|
|
break;
|
|
case IT_MM_STORE_SI128:
|
|
ret = "MM_STORE_SI128";
|
|
break;
|
|
case IT_MM_STORE_SS:
|
|
ret = "MM_STORE_SS";
|
|
break;
|
|
case IT_MM_STOREL_EPI64:
|
|
ret = "MM_STOREL_EPI64";
|
|
break;
|
|
case IT_MM_LOAD1_PS:
|
|
ret = "MM_LOAD1_PS";
|
|
break;
|
|
case IT_MM_LOAD_PS:
|
|
ret = "MM_LOAD_PS";
|
|
break;
|
|
case IT_MM_LOADU_PS:
|
|
ret = "MM_LOADU_PS";
|
|
break;
|
|
case IT_MM_LOAD_SS:
|
|
ret = "MM_LOAD_SS";
|
|
break;
|
|
case IT_MM_CMPNEQ_PS:
|
|
ret = "MM_CMPNEQ_PS";
|
|
break;
|
|
case IT_MM_ANDNOT_PS:
|
|
ret = "MM_ANDNOT_PS";
|
|
break;
|
|
case IT_MM_ANDNOT_SI128:
|
|
ret = "MM_ANDNOT_SI128";
|
|
break;
|
|
case IT_MM_AND_SI128:
|
|
ret = "MM_AND_SI128";
|
|
break;
|
|
case IT_MM_AND_PS:
|
|
ret = "MM_AND_PS";
|
|
break;
|
|
case IT_MM_OR_PS:
|
|
ret = "MM_OR_PS";
|
|
break;
|
|
case IT_MM_XOR_PS:
|
|
ret = "MM_XOR_PS";
|
|
break;
|
|
case IT_MM_OR_SI128:
|
|
ret = "MM_OR_SI128";
|
|
break;
|
|
case IT_MM_XOR_SI128:
|
|
ret = "MM_XOR_SI128";
|
|
break;
|
|
case IT_MM_MOVEMASK_PS:
|
|
ret = "MM_MOVEMASK_PS";
|
|
break;
|
|
case IT_MM_SHUFFLE_EPI32_DEFAULT:
|
|
ret = "MM_SHUFFLE_EPI32_DEFAULT";
|
|
break;
|
|
case IT_MM_SHUFFLE_EPI32_FUNCTION:
|
|
ret = "MM_SHUFFLE_EPI32_FUNCTION";
|
|
break;
|
|
case IT_MM_SHUFFLE_EPI32_SPLAT:
|
|
ret = "MM_SHUFFLE_EPI32_SPLAT";
|
|
break;
|
|
case IT_MM_SHUFFLE_EPI32_SINGLE:
|
|
ret = "MM_SHUFFLE_EPI32_SINGLE";
|
|
break;
|
|
case IT_MM_SHUFFLEHI_EPI16_FUNCTION:
|
|
ret = "MM_SHUFFLEHI_EPI16_FUNCTION";
|
|
break;
|
|
case IT_MM_MOVEMASK_EPI8:
|
|
ret = "MM_MOVEMASK_EPI8";
|
|
break;
|
|
case IT_MM_SUB_PS:
|
|
ret = "MM_SUB_PS";
|
|
break;
|
|
case IT_MM_SUB_EPI32:
|
|
ret = "MM_SUB_EPI32";
|
|
break;
|
|
case IT_MM_ADD_PS:
|
|
ret = "MM_ADD_PS";
|
|
break;
|
|
case IT_MM_ADD_SS:
|
|
ret = "MM_ADD_SS";
|
|
break;
|
|
case IT_MM_ADD_EPI32:
|
|
ret = "MM_ADD_EPI32";
|
|
break;
|
|
case IT_MM_ADD_EPI16:
|
|
ret = "MM_ADD_EPI16";
|
|
break;
|
|
case IT_MM_MULLO_EPI16:
|
|
ret = "MM_MULLO_EPI16";
|
|
break;
|
|
case IT_MM_MULLO_EPI32:
|
|
ret = "MM_MULLO_EPI32";
|
|
break;
|
|
case IT_MM_MUL_PS:
|
|
ret = "MM_MUL_PS";
|
|
break;
|
|
case IT_MM_DIV_PS:
|
|
ret = "MM_DIV_PS";
|
|
break;
|
|
case IT_MM_DIV_SS:
|
|
ret = "MM_DIV_SS";
|
|
break;
|
|
case IT_MM_RCP_PS:
|
|
ret = "MM_RCP_PS";
|
|
break;
|
|
case IT_MM_SQRT_PS:
|
|
ret = "MM_SQRT_PS";
|
|
break;
|
|
case IT_MM_SQRT_SS:
|
|
ret = "MM_SQRT_SS";
|
|
break;
|
|
case IT_MM_RSQRT_PS:
|
|
ret = "MM_RSQRT_PS";
|
|
break;
|
|
case IT_MM_MAX_PS:
|
|
ret = "MM_MAX_PS";
|
|
break;
|
|
case IT_MM_MIN_PS:
|
|
ret = "MM_MIN_PS";
|
|
break;
|
|
case IT_MM_MAX_SS:
|
|
ret = "MM_MAX_SS";
|
|
break;
|
|
case IT_MM_MIN_SS:
|
|
ret = "MM_MIN_SS";
|
|
break;
|
|
case IT_MM_MIN_EPI16:
|
|
ret = "MM_MIN_EPI16";
|
|
break;
|
|
case IT_MM_MAX_EPI32:
|
|
ret = "MM_MAX_EPI32";
|
|
break;
|
|
case IT_MM_MIN_EPI32:
|
|
ret = "MM_MIN_EPI32";
|
|
break;
|
|
case IT_MM_MULHI_EPI16:
|
|
ret = "MM_MULHI_EPI16";
|
|
break;
|
|
case IT_MM_HADD_PS:
|
|
ret = "MM_HADD_PS";
|
|
break;
|
|
case IT_MM_CMPLT_PS:
|
|
ret = "MM_CMPLT_PS";
|
|
break;
|
|
case IT_MM_CMPGT_PS:
|
|
ret = "MM_CMPGT_PS";
|
|
break;
|
|
case IT_MM_CMPGE_PS:
|
|
ret = "MM_CMPGE_PS";
|
|
break;
|
|
case IT_MM_CMPLE_PS:
|
|
ret = "MM_CMPLE_PS";
|
|
break;
|
|
case IT_MM_CMPEQ_PS:
|
|
ret = "MM_CMPEQ_PS";
|
|
break;
|
|
case IT_MM_CMPLT_EPI32:
|
|
ret = "MM_CMPLT_EPI32";
|
|
break;
|
|
case IT_MM_CMPGT_EPI32:
|
|
ret = "MM_CMPGT_EPI32";
|
|
break;
|
|
case IT_MM_CMPORD_PS:
|
|
ret = "MM_CMPORD_PS";
|
|
break;
|
|
case IT_MM_COMILT_SS:
|
|
ret = "MM_COMILT_SS";
|
|
break;
|
|
case IT_MM_COMIGT_SS:
|
|
ret = "MM_COMIGT_SS";
|
|
break;
|
|
case IT_MM_COMILE_SS:
|
|
ret = "MM_COMILE_SS";
|
|
break;
|
|
case IT_MM_COMIGE_SS:
|
|
ret = "MM_COMIGE_SS";
|
|
break;
|
|
case IT_MM_COMIEQ_SS:
|
|
ret = "MM_COMIEQ_SS";
|
|
break;
|
|
case IT_MM_COMINEQ_SS:
|
|
ret = "MM_COMINEQ_SS";
|
|
break;
|
|
case IT_MM_CVTTPS_EPI32:
|
|
ret = "MM_CVTTPS_EPI32";
|
|
break;
|
|
case IT_MM_CVTEPI32_PS:
|
|
ret = "MM_CVTEPI32_PS";
|
|
break;
|
|
case IT_MM_CVTPS_EPI32:
|
|
ret = "MM_CVTPS_EPI32";
|
|
break;
|
|
case IT_MM_CVTSI128_SI32:
|
|
ret = "MM_CVTSI128_SI32";
|
|
break;
|
|
case IT_MM_CVTSI32_SI128:
|
|
ret = "MM_CVTSI32_SI128";
|
|
break;
|
|
case IT_MM_CASTPS_SI128:
|
|
ret = "MM_CASTPS_SI128";
|
|
break;
|
|
case IT_MM_CASTSI128_PS:
|
|
ret = "MM_CASTSI128_PS";
|
|
break;
|
|
case IT_MM_LOAD_SI128:
|
|
ret = "MM_LOAD_SI128";
|
|
break;
|
|
case IT_MM_PACKS_EPI16:
|
|
ret = "MM_PACKS_EPI16";
|
|
break;
|
|
case IT_MM_PACKUS_EPI16:
|
|
ret = "MM_PACKUS_EPI16";
|
|
break;
|
|
case IT_MM_PACKS_EPI32:
|
|
ret = "MM_PACKS_EPI32";
|
|
break;
|
|
case IT_MM_UNPACKLO_EPI8:
|
|
ret = "MM_UNPACKLO_EPI8";
|
|
break;
|
|
case IT_MM_UNPACKLO_EPI16:
|
|
ret = "MM_UNPACKLO_EPI16";
|
|
break;
|
|
case IT_MM_UNPACKLO_EPI32:
|
|
ret = "MM_UNPACKLO_EPI32";
|
|
break;
|
|
case IT_MM_UNPACKLO_PS:
|
|
ret = "MM_UNPACKLO_PS";
|
|
break;
|
|
case IT_MM_UNPACKHI_PS:
|
|
ret = "MM_UNPACKHI_PS";
|
|
break;
|
|
case IT_MM_UNPACKHI_EPI8:
|
|
ret = "MM_UNPACKHI_EPI8";
|
|
break;
|
|
case IT_MM_UNPACKHI_EPI16:
|
|
ret = "MM_UNPACKHI_EPI16";
|
|
break;
|
|
case IT_MM_UNPACKHI_EPI32:
|
|
ret = "MM_UNPACKHI_EPI32";
|
|
break;
|
|
case IT_MM_SFENCE:
|
|
ret = "MM_SFENCE";
|
|
break;
|
|
case IT_MM_STREAM_SI128:
|
|
ret = "MM_STREAM_SI128";
|
|
break;
|
|
case IT_MM_CLFLUSH:
|
|
ret = "MM_CLFLUSH";
|
|
break;
|
|
case IT_MM_SHUFFLE_PS:
|
|
ret = "MM_SHUFFLE_PS";
|
|
break;
|
|
}
|
|
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
#define ASSERT_RETURN(x) if ( !(x) ) return false;
|
|
|
|
static float ranf(void)
|
|
{
|
|
uint32_t ir = rand() & 0x7FFF;
|
|
return (float)ir*(1.0f / 32768.0f);
|
|
}
|
|
|
|
static float ranf(float low, float high)
|
|
{
|
|
return ranf()*(high - low) + low;
|
|
}
|
|
|
|
bool validateInt(__m128i a, int32_t x, int32_t y, int32_t z, int32_t w)
|
|
{
|
|
const int32_t *t = (const int32_t *)&a;
|
|
ASSERT_RETURN(t[3] == x);
|
|
ASSERT_RETURN(t[2] == y);
|
|
ASSERT_RETURN(t[1] == z);
|
|
ASSERT_RETURN(t[0] == w);
|
|
return true;
|
|
}
|
|
|
|
bool validateInt16(__m128i a, int16_t d0, int16_t d1, int16_t d2, int16_t d3, int16_t d4, int16_t d5, int16_t d6, int16_t d7)
|
|
{
|
|
const int16_t *t = (const int16_t *)&a;
|
|
ASSERT_RETURN(t[0] == d0);
|
|
ASSERT_RETURN(t[1] == d1);
|
|
ASSERT_RETURN(t[2] == d2);
|
|
ASSERT_RETURN(t[3] == d3);
|
|
ASSERT_RETURN(t[4] == d4);
|
|
ASSERT_RETURN(t[5] == d5);
|
|
ASSERT_RETURN(t[6] == d6);
|
|
ASSERT_RETURN(t[7] == d7);
|
|
return true;
|
|
}
|
|
|
|
bool validateSingleFloatPair(float a, float b)
|
|
{
|
|
const uint32_t *ia = (const uint32_t *)&a;
|
|
const uint32_t *ib = (const uint32_t *)&b;
|
|
return (*ia) == (*ib) ? true : false; // We do an integer (binary) compare rather than a floating point compare to take nands and infinities into account as well.
|
|
}
|
|
|
|
bool validateFloat(__m128 a, float x, float y, float z, float w)
|
|
{
|
|
const float *t = (const float *)&a;
|
|
ASSERT_RETURN(validateSingleFloatPair(t[3],x));
|
|
ASSERT_RETURN(validateSingleFloatPair(t[2],y));
|
|
ASSERT_RETURN(validateSingleFloatPair(t[1],z));
|
|
ASSERT_RETURN(validateSingleFloatPair(t[0],w));
|
|
return true;
|
|
}
|
|
|
|
bool validateFloatEpsilon(__m128 a, float x, float y, float z, float w, float epsilon)
|
|
{
|
|
const float *t = (const float *)&a;
|
|
float dx = fabsf(t[3] - x);
|
|
float dy = fabsf(t[2] - y);
|
|
float dz = fabsf(t[1] - z);
|
|
float dw = fabsf(t[0] - w);
|
|
ASSERT_RETURN(dx < epsilon);
|
|
ASSERT_RETURN(dy < epsilon);
|
|
ASSERT_RETURN(dz < epsilon);
|
|
ASSERT_RETURN(dw < epsilon);
|
|
return true;
|
|
}
|
|
|
|
|
|
bool test_mm_setzero_si128(void)
|
|
{
|
|
__m128i a = _mm_setzero_si128();
|
|
return validateInt(a, 0, 0, 0, 0);
|
|
}
|
|
|
|
bool test_mm_setzero_ps(void)
|
|
{
|
|
__m128 a = _mm_setzero_ps();
|
|
return validateFloat(a, 0, 0, 0, 0);
|
|
}
|
|
|
|
bool test_mm_set1_ps(float w)
|
|
{
|
|
__m128 a = _mm_set1_ps(w);
|
|
return validateFloat(a, w, w, w, w);
|
|
}
|
|
|
|
bool test_mm_set_ps(float x, float y, float z, float w)
|
|
{
|
|
__m128 a = _mm_set_ps(x, y, z, w);
|
|
return validateFloat(a, x, y, z, w);
|
|
}
|
|
|
|
bool test_mm_set1_epi32(int32_t i)
|
|
{
|
|
__m128i a = _mm_set1_epi32(i);
|
|
return validateInt(a, i, i, i, i);
|
|
}
|
|
|
|
bool testret_mm_set_epi32(int32_t x, int32_t y, int32_t z, int32_t w)
|
|
{
|
|
__m128i a = _mm_set_epi32(x, y, z, w);
|
|
return validateInt(a, x, y, z, w);
|
|
}
|
|
|
|
__m128i test_mm_set_epi32(int32_t x, int32_t y, int32_t z, int32_t w)
|
|
{
|
|
__m128i a = _mm_set_epi32(x, y, z, w);
|
|
validateInt(a, x, y, z, w);
|
|
return a;
|
|
}
|
|
|
|
bool test_mm_store_ps(float *p, float x, float y, float z, float w)
|
|
{
|
|
__m128 a = _mm_set_ps(x, y, z, w);
|
|
_mm_store_ps(p, a);
|
|
ASSERT_RETURN(p[0] == w);
|
|
ASSERT_RETURN(p[1] == z);
|
|
ASSERT_RETURN(p[2] == y);
|
|
ASSERT_RETURN(p[3] == x);
|
|
return true;
|
|
}
|
|
|
|
bool test_mm_store_ps(int32_t *p, int32_t x, int32_t y, int32_t z, int32_t w)
|
|
{
|
|
__m128i a = _mm_set_epi32(x, y, z, w);
|
|
_mm_store_ps((float *)p, *(const __m128 *)&a);
|
|
ASSERT_RETURN(p[0] == w);
|
|
ASSERT_RETURN(p[1] == z);
|
|
ASSERT_RETURN(p[2] == y);
|
|
ASSERT_RETURN(p[3] == x);
|
|
return true;
|
|
}
|
|
|
|
bool test_mm_load1_ps(const float *p)
|
|
{
|
|
__m128 a = _mm_load1_ps(p);
|
|
return validateFloat(a, p[0], p[0], p[0], p[0]);
|
|
}
|
|
|
|
__m128 test_mm_load_ps(const float *p)
|
|
{
|
|
__m128 a = _mm_load_ps(p);
|
|
validateFloat(a, p[3], p[2], p[1], p[0]);
|
|
return a;
|
|
}
|
|
|
|
__m128i test_mm_load_ps(const int32_t *p)
|
|
{
|
|
__m128 a = _mm_load_ps((const float *)p);
|
|
__m128i ia = *(const __m128i *)&a;
|
|
validateInt(ia, p[3], p[2], p[1], p[0]);
|
|
return ia;
|
|
}
|
|
|
|
|
|
//r0 := ~a0 & b0
|
|
//r1 := ~a1 & b1
|
|
//r2 := ~a2 & b2
|
|
//r3 := ~a3 & b3
|
|
bool test_mm_andnot_ps(const float *_a, const float *_b)
|
|
{
|
|
bool r = false;
|
|
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
__m128 c = _mm_andnot_ps(a, b);
|
|
// now for the assertion...
|
|
const uint32_t *ia = (const uint32_t *)&a;
|
|
const uint32_t *ib = (const uint32_t *)&b;
|
|
uint32_t r0 = ~ia[0] & ib[0];
|
|
uint32_t r1 = ~ia[1] & ib[1];
|
|
uint32_t r2 = ~ia[2] & ib[2];
|
|
uint32_t r3 = ~ia[3] & ib[3];
|
|
__m128i ret = test_mm_set_epi32(r3, r2, r1, r0);
|
|
r = validateInt(*(const __m128i *)&c, r3, r2, r1, r0);
|
|
if (r)
|
|
{
|
|
r = validateInt(ret, r3, r2, r1, r0);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
bool test_mm_and_ps(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
__m128 c = _mm_and_ps(a, b);
|
|
// now for the assertion...
|
|
const uint32_t *ia = (const uint32_t *)&a;
|
|
const uint32_t *ib = (const uint32_t *)&b;
|
|
uint32_t r0 = ia[0] & ib[0];
|
|
uint32_t r1 = ia[1] & ib[1];
|
|
uint32_t r2 = ia[2] & ib[2];
|
|
uint32_t r3 = ia[3] & ib[3];
|
|
__m128i ret = test_mm_set_epi32(r3, r2, r1, r0);
|
|
bool r = validateInt(*(const __m128i *)&c, r3, r2, r1, r0);
|
|
if (r)
|
|
{
|
|
r = validateInt(ret, r3, r2, r1, r0);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
bool test_mm_or_ps(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
__m128 c = _mm_or_ps(a, b);
|
|
// now for the assertion...
|
|
const uint32_t *ia = (const uint32_t *)&a;
|
|
const uint32_t *ib = (const uint32_t *)&b;
|
|
uint32_t r0 = ia[0] | ib[0];
|
|
uint32_t r1 = ia[1] | ib[1];
|
|
uint32_t r2 = ia[2] | ib[2];
|
|
uint32_t r3 = ia[3] | ib[3];
|
|
__m128i ret = test_mm_set_epi32(r3, r2, r1, r0);
|
|
bool r = validateInt(*(const __m128i *)&c, r3, r2, r1, r0);
|
|
if (r)
|
|
{
|
|
r = validateInt(ret, r3, r2, r1, r0);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
|
|
bool test_mm_andnot_si128(const int32_t *_a, const int32_t *_b)
|
|
{
|
|
bool r = true;
|
|
__m128i a = test_mm_load_ps(_a);
|
|
__m128i b = test_mm_load_ps(_b);
|
|
__m128 fc = _mm_andnot_ps(*(const __m128 *)&a, *(const __m128 *)&b);
|
|
__m128i c = *(const __m128i *)&fc;
|
|
// now for the assertion...
|
|
const uint32_t *ia = (const uint32_t *)&a;
|
|
const uint32_t *ib = (const uint32_t *)&b;
|
|
uint32_t r0 = ~ia[0] & ib[0];
|
|
uint32_t r1 = ~ia[1] & ib[1];
|
|
uint32_t r2 = ~ia[2] & ib[2];
|
|
uint32_t r3 = ~ia[3] & ib[3];
|
|
__m128i ret = test_mm_set_epi32(r3, r2, r1, r0);
|
|
r = validateInt(c, r3, r2, r1, r0);
|
|
if (r)
|
|
{
|
|
validateInt(ret, r3, r2, r1, r0);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
bool test_mm_and_si128(const int32_t *_a, const int32_t *_b)
|
|
{
|
|
__m128i a = test_mm_load_ps(_a);
|
|
__m128i b = test_mm_load_ps(_b);
|
|
__m128 fc = _mm_and_ps(*(const __m128 *)&a, *(const __m128 *)&b);
|
|
__m128i c = *(const __m128i *)&fc;
|
|
// now for the assertion...
|
|
const uint32_t *ia = (const uint32_t *)&a;
|
|
const uint32_t *ib = (const uint32_t *)&b;
|
|
uint32_t r0 = ia[0] & ib[0];
|
|
uint32_t r1 = ia[1] & ib[1];
|
|
uint32_t r2 = ia[2] & ib[2];
|
|
uint32_t r3 = ia[3] & ib[3];
|
|
__m128i ret = test_mm_set_epi32(r3, r2, r1, r0);
|
|
bool r = validateInt(c, r3, r2, r1, r0);
|
|
if (r)
|
|
{
|
|
r = validateInt(ret, r3, r2, r1, r0);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
bool test_mm_or_si128(const int32_t *_a, const int32_t *_b)
|
|
{
|
|
__m128i a = test_mm_load_ps(_a);
|
|
__m128i b = test_mm_load_ps(_b);
|
|
__m128 fc = _mm_or_ps(*(const __m128 *)&a, *(const __m128 *)&b);
|
|
__m128i c = *(const __m128i *)&fc;
|
|
// now for the assertion...
|
|
const uint32_t *ia = (const uint32_t *)&a;
|
|
const uint32_t *ib = (const uint32_t *)&b;
|
|
uint32_t r0 = ia[0] | ib[0];
|
|
uint32_t r1 = ia[1] | ib[1];
|
|
uint32_t r2 = ia[2] | ib[2];
|
|
uint32_t r3 = ia[3] | ib[3];
|
|
__m128i ret = test_mm_set_epi32(r3, r2, r1, r0);
|
|
bool r = validateInt(c, r3, r2, r1, r0);
|
|
if (r)
|
|
{
|
|
r = validateInt(ret, r3, r2, r1, r0);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
bool test_mm_movemask_ps(const float *p)
|
|
{
|
|
int ret = 0;
|
|
|
|
const uint32_t *ip = (const uint32_t *)p;
|
|
if (ip[0] & 0x80000000)
|
|
{
|
|
ret |= 1;
|
|
}
|
|
if (ip[1] & 0x80000000)
|
|
{
|
|
ret |= 2;
|
|
}
|
|
if (ip[2] & 0x80000000)
|
|
{
|
|
ret |= 4;
|
|
}
|
|
if (ip[3] & 0x80000000)
|
|
{
|
|
ret |= 8;
|
|
}
|
|
__m128 a = test_mm_load_ps(p);
|
|
int val = _mm_movemask_ps(a);
|
|
return val == ret ? true : false;
|
|
}
|
|
|
|
// Note, NEON does not have a general purpose shuffled command like SSE.
|
|
// When invoking this method, there is special code for a number of the most
|
|
// common shuffle permutations
|
|
bool test_mm_shuffle_ps(const float *_a, const float *_b)
|
|
{
|
|
bool isValid = true;
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
// Test many permutations of the shuffle operation, including all permutations which have an optmized/custom implementation
|
|
__m128 ret;
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(0, 1, 2, 3));
|
|
if (!validateFloat(ret, _b[0], _b[1], _a[2], _a[3]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 2, 1, 0));
|
|
if (!validateFloat(ret, _b[3], _b[2], _a[1], _a[0]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(0, 0, 1, 1));
|
|
if (!validateFloat(ret, _b[0], _b[0], _a[1], _a[1]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 1, 0, 2));
|
|
if (!validateFloat(ret, _b[3], _b[1], _a[0], _a[2]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(1, 0, 3, 2));
|
|
if (!validateFloat(ret, _b[1], _b[0], _a[3], _a[2]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 3, 0, 1));
|
|
if (!validateFloat(ret, _b[2], _b[3], _a[0], _a[1]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(0, 0, 2, 2));
|
|
if (!validateFloat(ret, _b[0], _b[0], _a[2], _a[2]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 2, 0, 0));
|
|
if (!validateFloat(ret, _b[2], _b[2], _a[0], _a[0]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(3, 2, 0, 2));
|
|
if (!validateFloat(ret, _b[3], _b[2], _a[0], _a[2]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(1, 1, 3, 3));
|
|
if (!validateFloat(ret, _b[1], _b[1], _a[3], _a[3]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 1, 0));
|
|
if (!validateFloat(ret, _b[2], _b[0], _a[1], _a[0]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 0, 1));
|
|
if (!validateFloat(ret, _b[2], _b[0], _a[0], _a[1]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
ret = _mm_shuffle_ps(a, b, _MM_SHUFFLE(2, 0, 3, 2));
|
|
if (!validateFloat(ret, _b[2], _b[0], _a[3], _a[2]))
|
|
{
|
|
isValid = false;
|
|
}
|
|
|
|
return isValid;
|
|
}
|
|
|
|
bool test_mm_movemask_epi8(const int32_t *_a)
|
|
{
|
|
__m128i a = test_mm_load_ps(_a);
|
|
|
|
const uint8_t *ip = (const uint8_t *)_a;
|
|
int ret = 0;
|
|
uint32_t mask = 1;
|
|
for (uint32_t i = 0; i < 16; i++)
|
|
{
|
|
if (ip[i] & 0x80)
|
|
{
|
|
ret |= mask;
|
|
}
|
|
mask = mask << 1;
|
|
}
|
|
int test = _mm_movemask_epi8(a);
|
|
ASSERT_RETURN(test == ret);
|
|
return true;
|
|
}
|
|
|
|
bool test_mm_sub_ps(const float *_a, const float *_b)
|
|
{
|
|
float dx = _a[0] - _b[0];
|
|
float dy = _a[1] - _b[1];
|
|
float dz = _a[2] - _b[2];
|
|
float dw = _a[3] - _b[3];
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
__m128 c = _mm_sub_ps(a, b);
|
|
return validateFloat(c, dw, dz, dy, dx);
|
|
}
|
|
|
|
bool test_mm_sub_epi32(const int32_t *_a, const int32_t *_b)
|
|
{
|
|
int32_t dx = _a[0] - _b[0];
|
|
int32_t dy = _a[1] - _b[1];
|
|
int32_t dz = _a[2] - _b[2];
|
|
int32_t dw = _a[3] - _b[3];
|
|
__m128i a = test_mm_load_ps(_a);
|
|
__m128i b = test_mm_load_ps(_b);
|
|
|
|
__m128i c = _mm_sub_epi32(a, b);
|
|
return validateInt(c, dw, dz, dy, dx);
|
|
}
|
|
|
|
bool test_mm_add_ps(const float *_a, const float *_b)
|
|
{
|
|
float dx = _a[0] + _b[0];
|
|
float dy = _a[1] + _b[1];
|
|
float dz = _a[2] + _b[2];
|
|
float dw = _a[3] + _b[3];
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
__m128 c = _mm_add_ps(a, b);
|
|
return validateFloat(c, dw, dz, dy, dx);
|
|
}
|
|
|
|
bool test_mm_add_epi32(const int32_t *_a, const int32_t *_b)
|
|
{
|
|
int32_t dx = _a[0] + _b[0];
|
|
int32_t dy = _a[1] + _b[1];
|
|
int32_t dz = _a[2] + _b[2];
|
|
int32_t dw = _a[3] + _b[3];
|
|
__m128i a = test_mm_load_ps(_a);
|
|
__m128i b = test_mm_load_ps(_b);
|
|
|
|
__m128i c = _mm_add_epi32(a, b);
|
|
return validateInt(c, dw, dz, dy, dx);
|
|
}
|
|
|
|
bool test_mm_mullo_epi16(const int16_t *_a, const int16_t *_b)
|
|
{
|
|
int16_t d0 = _a[0] * _b[0];
|
|
int16_t d1 = _a[1] * _b[1];
|
|
int16_t d2 = _a[2] * _b[2];
|
|
int16_t d3 = _a[3] * _b[3];
|
|
int16_t d4 = _a[4] * _b[4];
|
|
int16_t d5 = _a[5] * _b[5];
|
|
int16_t d6 = _a[6] * _b[6];
|
|
int16_t d7 = _a[7] * _b[7];
|
|
|
|
__m128i a = test_mm_load_ps((const int32_t *)_a);
|
|
__m128i b = test_mm_load_ps((const int32_t *)_b);
|
|
|
|
__m128i c = _mm_mullo_epi16(a, b);
|
|
return validateInt16(c, d0, d1, d2, d3, d4, d5, d6, d7);
|
|
}
|
|
|
|
bool test_mm_mul_ps(const float *_a, const float *_b)
|
|
{
|
|
float dx = _a[0] * _b[0];
|
|
float dy = _a[1] * _b[1];
|
|
float dz = _a[2] * _b[2];
|
|
float dw = _a[3] * _b[3];
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
__m128 c = _mm_mul_ps(a, b);
|
|
return validateFloat(c, dw, dz, dy, dx);
|
|
}
|
|
|
|
bool test_mm_rcp_ps(const float *_a)
|
|
{
|
|
float dx = 1.0f / _a[0];
|
|
float dy = 1.0f / _a[1];
|
|
float dz = 1.0f / _a[2];
|
|
float dw = 1.0f / _a[3];
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 c = _mm_rcp_ps(a);
|
|
return validateFloatEpsilon(c, dw, dz, dy, dx, 300.0f);
|
|
}
|
|
|
|
bool test_mm_max_ps(const float *_a, const float *_b)
|
|
{
|
|
float c[4];
|
|
|
|
c[0] = _a[0] > _b[0] ? _a[0] : _b[0];
|
|
c[1] = _a[1] > _b[1] ? _a[1] : _b[1];
|
|
c[2] = _a[2] > _b[2] ? _a[2] : _b[2];
|
|
c[3] = _a[3] > _b[3] ? _a[3] : _b[3];
|
|
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
__m128 ret = _mm_max_ps(a, b);
|
|
return validateFloat(ret, c[3], c[2], c[1], c[0]);
|
|
|
|
}
|
|
|
|
bool test_mm_min_ps(const float *_a, const float *_b)
|
|
{
|
|
float c[4];
|
|
|
|
c[0] = _a[0] < _b[0] ? _a[0] : _b[0];
|
|
c[1] = _a[1] < _b[1] ? _a[1] : _b[1];
|
|
c[2] = _a[2] < _b[2] ? _a[2] : _b[2];
|
|
c[3] = _a[3] < _b[3] ? _a[3] : _b[3];
|
|
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
__m128 ret = _mm_min_ps(a, b);
|
|
return validateFloat(ret, c[3], c[2], c[1], c[0]);
|
|
}
|
|
|
|
bool test_mm_min_epi16(const int16_t *_a, const int16_t *_b)
|
|
{
|
|
int16_t d0 = _a[0] < _b[0] ? _a[0] : _b[0];
|
|
int16_t d1 = _a[1] < _b[1] ? _a[1] : _b[1];
|
|
int16_t d2 = _a[2] < _b[2] ? _a[2] : _b[2];
|
|
int16_t d3 = _a[3] < _b[3] ? _a[3] : _b[3];
|
|
int16_t d4 = _a[4] < _b[4] ? _a[4] : _b[4];
|
|
int16_t d5 = _a[5] < _b[5] ? _a[5] : _b[5];
|
|
int16_t d6 = _a[6] < _b[6] ? _a[6] : _b[6];
|
|
int16_t d7 = _a[7] < _b[7] ? _a[7] : _b[7];
|
|
|
|
__m128i a = test_mm_load_ps((const int32_t *)_a);
|
|
__m128i b = test_mm_load_ps((const int32_t *)_b);
|
|
|
|
__m128i c = _mm_min_epi16(a, b);
|
|
return validateInt16(c, d0, d1, d2, d3, d4, d5, d6, d7);
|
|
}
|
|
|
|
bool test_mm_mulhi_epi16(const int16_t *_a, const int16_t *_b)
|
|
{
|
|
int16_t d[8];
|
|
for (uint32_t i = 0; i < 8; i++)
|
|
{
|
|
int32_t m = (int32_t)_a[i] * (int32_t)_b[i];
|
|
d[i] = (int16_t)(m >> 16);
|
|
}
|
|
|
|
__m128i a = test_mm_load_ps((const int32_t *)_a);
|
|
__m128i b = test_mm_load_ps((const int32_t *)_b);
|
|
|
|
__m128i c = _mm_mulhi_epi16(a, b);
|
|
return validateInt16(c, d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]);
|
|
}
|
|
|
|
bool test_mm_cmplt_ps(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
int32_t result[4];
|
|
result[0] = _a[0] < _b[0] ? -1 : 0;
|
|
result[1] = _a[1] < _b[1] ? -1 : 0;
|
|
result[2] = _a[2] < _b[2] ? -1 : 0;
|
|
result[3] = _a[3] < _b[3] ? -1 : 0;
|
|
|
|
__m128 ret = _mm_cmplt_ps(a, b);
|
|
__m128i iret = *(const __m128i *)&ret;
|
|
return validateInt(iret, result[3], result[2], result[1], result[0]);
|
|
}
|
|
|
|
bool test_mm_cmpgt_ps(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
int32_t result[4];
|
|
result[0] = _a[0] > _b[0] ? -1 : 0;
|
|
result[1] = _a[1] > _b[1] ? -1 : 0;
|
|
result[2] = _a[2] > _b[2] ? -1 : 0;
|
|
result[3] = _a[3] > _b[3] ? -1 : 0;
|
|
|
|
__m128 ret = _mm_cmpgt_ps(a, b);
|
|
__m128i iret = *(const __m128i *)&ret;
|
|
return validateInt(iret, result[3], result[2], result[1], result[0]);
|
|
}
|
|
|
|
bool test_mm_cmpge_ps(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
int32_t result[4];
|
|
result[0] = _a[0] >= _b[0] ? -1 : 0;
|
|
result[1] = _a[1] >= _b[1] ? -1 : 0;
|
|
result[2] = _a[2] >= _b[2] ? -1 : 0;
|
|
result[3] = _a[3] >= _b[3] ? -1 : 0;
|
|
|
|
__m128 ret = _mm_cmpge_ps(a, b);
|
|
__m128i iret = *(const __m128i *)&ret;
|
|
return validateInt(iret, result[3], result[2], result[1], result[0]);
|
|
}
|
|
|
|
bool test_mm_cmple_ps(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
int32_t result[4];
|
|
result[0] = _a[0] <= _b[0] ? -1 : 0;
|
|
result[1] = _a[1] <= _b[1] ? -1 : 0;
|
|
result[2] = _a[2] <= _b[2] ? -1 : 0;
|
|
result[3] = _a[3] <= _b[3] ? -1 : 0;
|
|
|
|
__m128 ret = _mm_cmple_ps(a, b);
|
|
__m128i iret = *(const __m128i *)&ret;
|
|
return validateInt(iret, result[3], result[2], result[1], result[0]);
|
|
}
|
|
|
|
bool test_mm_cmpeq_ps(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
int32_t result[4];
|
|
result[0] = _a[0] == _b[0] ? -1 : 0;
|
|
result[1] = _a[1] == _b[1] ? -1 : 0;
|
|
result[2] = _a[2] == _b[2] ? -1 : 0;
|
|
result[3] = _a[3] == _b[3] ? -1 : 0;
|
|
|
|
__m128 ret = _mm_cmpeq_ps(a, b);
|
|
__m128i iret = *(const __m128i *)&ret;
|
|
return validateInt(iret, result[3], result[2], result[1], result[0]);
|
|
}
|
|
|
|
|
|
bool test_mm_cmplt_epi32(const int32_t *_a, const int32_t *_b)
|
|
{
|
|
__m128i a = test_mm_load_ps(_a);
|
|
__m128i b = test_mm_load_ps(_b);
|
|
|
|
int32_t result[4];
|
|
result[0] = _a[0] < _b[0] ? -1 : 0;
|
|
result[1] = _a[1] < _b[1] ? -1 : 0;
|
|
result[2] = _a[2] < _b[2] ? -1 : 0;
|
|
result[3] = _a[3] < _b[3] ? -1 : 0;
|
|
|
|
__m128i iret = _mm_cmplt_epi32(a, b);
|
|
return validateInt(iret, result[3], result[2], result[1], result[0]);
|
|
}
|
|
|
|
bool test_mm_cmpgt_epi32(const int32_t *_a, const int32_t *_b)
|
|
{
|
|
__m128i a = test_mm_load_ps(_a);
|
|
__m128i b = test_mm_load_ps(_b);
|
|
|
|
int32_t result[4];
|
|
|
|
result[0] = _a[0] > _b[0] ? -1 : 0;
|
|
result[1] = _a[1] > _b[1] ? -1 : 0;
|
|
result[2] = _a[2] > _b[2] ? -1 : 0;
|
|
result[3] = _a[3] > _b[3] ? -1 : 0;
|
|
|
|
__m128i iret = _mm_cmpgt_epi32(a, b);
|
|
return validateInt(iret, result[3], result[2], result[1], result[0]);
|
|
}
|
|
|
|
|
|
|
|
float compord(float a, float b)
|
|
{
|
|
float ret;
|
|
|
|
bool isNANA = isNAN(a);
|
|
bool isNANB = isNAN(b);
|
|
if ( !isNANA && !isNANB)
|
|
{
|
|
ret = getNAN();
|
|
}
|
|
else
|
|
{
|
|
ret = 0.0f;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
bool test_mm_cmpord_ps(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
float result[4];
|
|
|
|
for (uint32_t i = 0; i < 4; i++)
|
|
{
|
|
result[i] = compord(_a[i], _b[i]);
|
|
}
|
|
|
|
__m128 ret = _mm_cmpord_ps(a, b);
|
|
|
|
return validateFloat(ret, result[3], result[2], result[1], result[0]);
|
|
}
|
|
//********************************************
|
|
int32_t comilt_ss(float a, float b)
|
|
{
|
|
int32_t ret;
|
|
|
|
bool isNANA = isNAN(a);
|
|
bool isNANB = isNAN(b);
|
|
if (!isNANA && !isNANB)
|
|
{
|
|
ret = a < b ? 1 : 0;
|
|
}
|
|
else
|
|
{
|
|
ret = 0; // **NOTE** The documentation on MSDN is in error! The actual hardware returns a 0, not a 1 if either of the values is a NAN!
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
|
|
bool test_mm_comilt_ss(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
|
|
int32_t result = comilt_ss(_a[0], _b[0]);
|
|
|
|
int32_t ret = _mm_comilt_ss(a, b);
|
|
|
|
return result == ret ? true : false;
|
|
}
|
|
//********************************************
|
|
|
|
//********************************************
|
|
int32_t comigt_ss(float a, float b)
|
|
{
|
|
int32_t ret;
|
|
|
|
bool isNANA = isNAN(a);
|
|
bool isNANB = isNAN(b);
|
|
if (!isNANA && !isNANB)
|
|
{
|
|
ret = a > b ? 1 : 0;
|
|
}
|
|
else
|
|
{
|
|
ret = 0; // **NOTE** The documentation on MSDN is in error! The actual hardware returns a 0, not a 1 if either of the values is a NAN!
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
bool test_mm_comigt_ss(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
|
|
int32_t result = comigt_ss(_a[0], _b[0]);
|
|
|
|
int32_t ret = _mm_comigt_ss(a, b);
|
|
|
|
return result == ret ? true : false;
|
|
}
|
|
//********************************************
|
|
|
|
//********************************************
|
|
int32_t comile_ss(float a, float b)
|
|
{
|
|
int32_t ret;
|
|
|
|
bool isNANA = isNAN(a);
|
|
bool isNANB = isNAN(b);
|
|
if (!isNANA && !isNANB)
|
|
{
|
|
ret = a <= b ? 1 : 0;
|
|
}
|
|
else
|
|
{
|
|
ret = 0; // **NOTE** The documentation on MSDN is in error! The actual hardware returns a 0, not a 1 if either of the values is a NAN!
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
bool test_mm_comile_ss(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
|
|
int32_t result = comile_ss(_a[0], _b[0]);
|
|
|
|
int32_t ret = _mm_comile_ss(a, b);
|
|
|
|
return result == ret ? true : false;
|
|
}
|
|
//********************************************
|
|
|
|
//********************************************
|
|
int32_t comige_ss(float a, float b)
|
|
{
|
|
int32_t ret;
|
|
|
|
bool isNANA = isNAN(a);
|
|
bool isNANB = isNAN(b);
|
|
if (!isNANA && !isNANB)
|
|
{
|
|
ret = a >= b ? 1 : 0;
|
|
}
|
|
else
|
|
{
|
|
ret = 0; // **NOTE** The documentation on MSDN is in error! The actual hardware returns a 0, not a 1 if either of the values is a NAN!
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
bool test_mm_comige_ss(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
|
|
int32_t result = comige_ss(_a[0], _b[0]);
|
|
|
|
int32_t ret = _mm_comige_ss(a, b);
|
|
|
|
return result == ret ? true : false;
|
|
}
|
|
//********************************************
|
|
|
|
//********************************************
|
|
int32_t comieq_ss(float a, float b)
|
|
{
|
|
int32_t ret;
|
|
|
|
bool isNANA = isNAN(a);
|
|
bool isNANB = isNAN(b);
|
|
if (!isNANA && !isNANB)
|
|
{
|
|
ret = a == b ? 1 : 0;
|
|
}
|
|
else
|
|
{
|
|
ret = 0; // **NOTE** The documentation on MSDN is in error! The actual hardware returns a 0, not a 1 if either of the values is a NAN!
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
bool test_mm_comieq_ss(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
int32_t result = comieq_ss(_a[0], _b[0]);
|
|
|
|
int32_t ret = _mm_comieq_ss(a, b);
|
|
|
|
return result == ret ? true : false;
|
|
}
|
|
//********************************************
|
|
|
|
|
|
//********************************************
|
|
int32_t comineq_ss(float a, float b)
|
|
{
|
|
int32_t ret;
|
|
|
|
bool isNANA = isNAN(a);
|
|
bool isNANB = isNAN(b);
|
|
if (!isNANA && !isNANB)
|
|
{
|
|
ret = a != b ? 1 : 0;
|
|
}
|
|
else
|
|
{
|
|
ret = 1;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
bool test_mm_comineq_ss(const float *_a, const float *_b)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
__m128 b = test_mm_load_ps(_b);
|
|
|
|
|
|
int32_t result = comineq_ss(_a[0], _b[0]);
|
|
|
|
int32_t ret = _mm_comineq_ss(a, b);
|
|
|
|
return result == ret ? true : false;
|
|
}
|
|
//********************************************
|
|
|
|
|
|
|
|
|
|
bool test_mm_cvttps_epi32(const float *_a)
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
int32_t trun[4];
|
|
for (uint32_t i = 0; i < 4; i++)
|
|
{
|
|
trun[i] = (int32_t)_a[i];
|
|
}
|
|
|
|
__m128i ret = _mm_cvttps_epi32(a);
|
|
return validateInt(ret, trun[3], trun[2], trun[1], trun[0]);
|
|
}
|
|
|
|
bool test_mm_cvtepi32_ps(const int32_t *_a)
|
|
{
|
|
__m128i a = test_mm_load_ps(_a);
|
|
float trun[4];
|
|
for (uint32_t i = 0; i < 4; i++)
|
|
{
|
|
trun[i] = (float)_a[i];
|
|
}
|
|
|
|
__m128 ret = _mm_cvtepi32_ps(a);
|
|
return validateFloat(ret, trun[3], trun[2], trun[1], trun[0]);
|
|
}
|
|
|
|
// https://msdn.microsoft.com/en-us/library/xdc42k5e%28v=vs.90%29.aspx?f=255&MSPPError=-2147217396
|
|
bool test_mm_cvtps_epi32(const float _a[4])
|
|
{
|
|
__m128 a = test_mm_load_ps(_a);
|
|
int32_t trun[4];
|
|
for (uint32_t i = 0; i < 4; i++)
|
|
{
|
|
trun[i] = (int32_t)(bankersRounding(_a[i]));
|
|
}
|
|
|
|
__m128i ret = _mm_cvtps_epi32(a);
|
|
return validateInt(ret, trun[3], trun[2], trun[1], trun[0]);
|
|
}
|
|
|
|
|
|
// Try 10,000 random floating point values for each test we run
|
|
#define MAX_TEST_VALUE 10000
|
|
|
|
|
|
class SSE2NEONTestImpl : public SSE2NEONTest
|
|
{
|
|
public:
|
|
SSE2NEONTestImpl(void)
|
|
{
|
|
mTestFloatPointer1 = (float *)platformAlignedAlloc(sizeof(__m128));
|
|
mTestFloatPointer2 = (float *)platformAlignedAlloc(sizeof(__m128));
|
|
mTestIntPointer1 = (int32_t *)platformAlignedAlloc(sizeof(__m128i));
|
|
mTestIntPointer2 = (int32_t *)platformAlignedAlloc(sizeof(__m128i));
|
|
srand(0);
|
|
for (uint32_t i = 0; i < MAX_TEST_VALUE; i++)
|
|
{
|
|
mTestFloats[i] = ranf(-100000, 100000);
|
|
mTestInts[i] = (int32_t)ranf(-100000, 100000);
|
|
}
|
|
}
|
|
|
|
virtual ~SSE2NEONTestImpl(void)
|
|
{
|
|
platformAlignedFree(mTestFloatPointer1);
|
|
platformAlignedFree(mTestFloatPointer2);
|
|
platformAlignedFree(mTestIntPointer1);
|
|
platformAlignedFree(mTestIntPointer2);
|
|
}
|
|
|
|
bool loadTestFloatPointers(uint32_t i)
|
|
{
|
|
bool ret = test_mm_store_ps(mTestFloatPointer1, mTestFloats[i], mTestFloats[i + 1], mTestFloats[i + 2], mTestFloats[i + 3]);
|
|
if (ret)
|
|
{
|
|
ret = test_mm_store_ps(mTestFloatPointer2, mTestFloats[i + 4], mTestFloats[i + 5], mTestFloats[i + 6], mTestFloats[i + 7]);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
bool loadTestIntPointers(uint32_t i)
|
|
{
|
|
bool ret = test_mm_store_ps(mTestIntPointer1, mTestInts[i], mTestInts[i + 1], mTestInts[i + 2], mTestInts[i + 3]);
|
|
if (ret)
|
|
{
|
|
ret = test_mm_store_ps(mTestIntPointer2, mTestInts[i + 4], mTestInts[i + 5], mTestInts[i + 6], mTestInts[i + 7]);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
bool runSingleTest(InstructionTest test,uint32_t i)
|
|
{
|
|
bool ret = true;
|
|
|
|
switch ( test )
|
|
{
|
|
case IT_MM_SETZERO_SI128:
|
|
ret = test_mm_setzero_si128();
|
|
break;
|
|
case IT_MM_SETZERO_PS:
|
|
ret = test_mm_setzero_ps();
|
|
break;
|
|
case IT_MM_SET1_PS:
|
|
ret = test_mm_set1_ps(mTestFloats[i]);
|
|
break;
|
|
case IT_MM_SET_PS1:
|
|
ret = test_mm_set1_ps(mTestFloats[i]);
|
|
break;
|
|
case IT_MM_SET_PS:
|
|
ret = test_mm_set_ps(mTestFloats[i], mTestFloats[i + 1], mTestFloats[i + 2], mTestFloats[i + 3]);
|
|
break;
|
|
case IT_MM_SET1_EPI32:
|
|
ret = test_mm_set1_epi32(mTestInts[i]);
|
|
break;
|
|
case IT_MM_SET_EPI32:
|
|
ret = testret_mm_set_epi32(mTestInts[i], mTestInts[i + 1], mTestInts[i + 2], mTestInts[i + 3]);
|
|
break;
|
|
case IT_MM_STORE_PS:
|
|
ret = test_mm_store_ps(mTestIntPointer1, mTestInts[i], mTestInts[i + 1], mTestInts[i + 2], mTestInts[i + 3]);
|
|
break;
|
|
case IT_MM_LOAD1_PS:
|
|
ret = test_mm_load1_ps(mTestFloatPointer1);
|
|
break;
|
|
case IT_MM_ANDNOT_PS:
|
|
ret = test_mm_andnot_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_ANDNOT_SI128:
|
|
ret = test_mm_andnot_si128(mTestIntPointer1, mTestIntPointer2);
|
|
break;
|
|
case IT_MM_AND_SI128:
|
|
ret = test_mm_and_si128(mTestIntPointer1, mTestIntPointer2);
|
|
break;
|
|
case IT_MM_AND_PS:
|
|
ret = test_mm_and_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_OR_PS:
|
|
ret = test_mm_or_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_OR_SI128:
|
|
ret = test_mm_or_si128(mTestIntPointer1, mTestIntPointer2);
|
|
break;
|
|
case IT_MM_MOVEMASK_PS:
|
|
ret = test_mm_movemask_ps(mTestFloatPointer1);
|
|
break;
|
|
case IT_MM_SHUFFLE_PS:
|
|
ret = test_mm_shuffle_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_MOVEMASK_EPI8:
|
|
ret = test_mm_movemask_epi8(mTestIntPointer1);
|
|
break;
|
|
case IT_MM_SUB_PS:
|
|
ret = test_mm_sub_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_SUB_EPI32:
|
|
ret = test_mm_sub_epi32(mTestIntPointer1, mTestIntPointer2);
|
|
break;
|
|
case IT_MM_ADD_PS:
|
|
ret = test_mm_add_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_ADD_EPI32:
|
|
ret = test_mm_add_epi32(mTestIntPointer1, mTestIntPointer2);
|
|
break;
|
|
case IT_MM_MULLO_EPI16:
|
|
ret = test_mm_mullo_epi16((const int16_t *)mTestIntPointer1, (const int16_t *)mTestIntPointer2);
|
|
break;
|
|
case IT_MM_MUL_PS:
|
|
ret = test_mm_mul_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_RCP_PS:
|
|
ret = test_mm_rcp_ps(mTestFloatPointer1);
|
|
break;
|
|
case IT_MM_MAX_PS:
|
|
ret = test_mm_max_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_MIN_PS:
|
|
ret = test_mm_min_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_MIN_EPI16:
|
|
ret = test_mm_min_epi16((const int16_t *)mTestIntPointer1, (const int16_t *)mTestIntPointer2);
|
|
break;
|
|
case IT_MM_MULHI_EPI16:
|
|
ret = test_mm_mulhi_epi16((const int16_t *)mTestIntPointer1, (const int16_t *)mTestIntPointer2);
|
|
break;
|
|
case IT_MM_CMPLT_PS:
|
|
ret = test_mm_cmplt_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_CMPGT_PS:
|
|
ret = test_mm_cmpgt_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_CMPGE_PS:
|
|
ret = test_mm_cmpge_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_CMPLE_PS:
|
|
ret = test_mm_cmple_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_CMPEQ_PS:
|
|
ret = test_mm_cmpeq_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_CMPLT_EPI32:
|
|
ret = test_mm_cmplt_epi32(mTestIntPointer1, mTestIntPointer2);
|
|
break;
|
|
case IT_MM_CMPGT_EPI32:
|
|
ret = test_mm_cmpgt_epi32(mTestIntPointer1, mTestIntPointer2);
|
|
break;
|
|
case IT_MM_CVTTPS_EPI32:
|
|
ret = test_mm_cvttps_epi32(mTestFloatPointer1);
|
|
break;
|
|
case IT_MM_CVTEPI32_PS:
|
|
ret = test_mm_cvtepi32_ps(mTestIntPointer1);
|
|
break;
|
|
case IT_MM_CVTPS_EPI32:
|
|
ret = test_mm_cvtps_epi32(mTestFloatPointer1);
|
|
break;
|
|
case IT_MM_CMPORD_PS:
|
|
ret = test_mm_cmpord_ps(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_COMILT_SS:
|
|
ret = test_mm_comilt_ss(mTestFloatPointer1, mTestFloatPointer2);
|
|
if (!ret)
|
|
{
|
|
// Note to Alexander, you need to fix this.
|
|
ret = test_mm_comilt_ss(mTestFloatPointer1, mTestFloatPointer2);
|
|
}
|
|
break;
|
|
case IT_MM_COMIGT_SS:
|
|
ret = test_mm_comigt_ss(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_COMILE_SS:
|
|
ret = test_mm_comile_ss(mTestFloatPointer1, mTestFloatPointer2);
|
|
if (!ret)
|
|
{
|
|
// Note to Alexander, you need to fix this.
|
|
ret = test_mm_comile_ss(mTestFloatPointer1, mTestFloatPointer2);
|
|
}
|
|
break;
|
|
case IT_MM_COMIGE_SS:
|
|
ret = test_mm_comige_ss(mTestFloatPointer1, mTestFloatPointer2);
|
|
break;
|
|
case IT_MM_COMIEQ_SS:
|
|
ret = test_mm_comieq_ss(mTestFloatPointer1, mTestFloatPointer2);
|
|
if (!ret)
|
|
{
|
|
// Note to Alexander, you need to fix this.
|
|
ret = test_mm_comieq_ss(mTestFloatPointer1, mTestFloatPointer2);
|
|
}
|
|
break;
|
|
case IT_MM_COMINEQ_SS:
|
|
ret = test_mm_comineq_ss(mTestFloatPointer1, mTestFloatPointer2);
|
|
if (!ret)
|
|
{
|
|
// Note to Alexander, you need to fix this.
|
|
ret = test_mm_comineq_ss(mTestFloatPointer1, mTestFloatPointer2);
|
|
}
|
|
break;
|
|
case IT_MM_HADD_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_MAX_EPI32:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_MIN_EPI32:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_MAX_SS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_MIN_SS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_SQRT_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_SQRT_SS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_RSQRT_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_DIV_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_DIV_SS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_MULLO_EPI32:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_ADD_EPI16:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_ADD_SS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_SHUFFLE_EPI32_DEFAULT:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_SHUFFLE_EPI32_FUNCTION:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_SHUFFLE_EPI32_SPLAT:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_SHUFFLE_EPI32_SINGLE:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_SHUFFLEHI_EPI16_FUNCTION:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_XOR_SI128:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_XOR_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_LOAD_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_LOADU_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_LOAD_SS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_CMPNEQ_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_STOREU_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_STORE_SI128:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_STORE_SS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_STOREL_EPI64:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_SETR_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_CVTSI128_SI32:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_CVTSI32_SI128:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_CASTPS_SI128:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_CASTSI128_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_LOAD_SI128:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_PACKS_EPI16:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_PACKUS_EPI16:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_PACKS_EPI32:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_UNPACKLO_EPI8:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_UNPACKLO_EPI16:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_UNPACKLO_EPI32:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_UNPACKLO_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_UNPACKHI_PS:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_UNPACKHI_EPI8:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_UNPACKHI_EPI16:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_UNPACKHI_EPI32:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_SFENCE:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_STREAM_SI128:
|
|
ret = true;
|
|
break;
|
|
case IT_MM_CLFLUSH:
|
|
ret = true;
|
|
break;
|
|
}
|
|
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
virtual bool runTest(InstructionTest test)
|
|
{
|
|
bool ret = true;
|
|
|
|
|
|
// Test a whole bunch of values
|
|
for (uint32_t i = 0; i < (MAX_TEST_VALUE - 8); i++)
|
|
{
|
|
ret = loadTestFloatPointers(i); // Load some random float values
|
|
if ( !ret ) break; // load test float failed??
|
|
ret = loadTestIntPointers(i); // load some random int values
|
|
if ( !ret ) break; // load test float failed??
|
|
// If we are testing the reciprocal, then invert the input data (easier for debugging)
|
|
if ( test == IT_MM_RCP_PS )
|
|
{
|
|
|
|
mTestFloatPointer1[0] = 1.0f / mTestFloatPointer1[0];
|
|
mTestFloatPointer1[1] = 1.0f / mTestFloatPointer1[1];
|
|
mTestFloatPointer1[2] = 1.0f / mTestFloatPointer1[2];
|
|
mTestFloatPointer1[3] = 1.0f / mTestFloatPointer1[3];
|
|
}
|
|
if ( test == IT_MM_CMPGE_PS || test == IT_MM_CMPLE_PS || test == IT_MM_CMPEQ_PS )
|
|
{
|
|
// Make sure at least one value is the same.
|
|
mTestFloatPointer1[3] = mTestFloatPointer2[3];
|
|
}
|
|
|
|
if (test == IT_MM_CMPORD_PS ||
|
|
test == IT_MM_COMILT_SS ||
|
|
test == IT_MM_COMILE_SS ||
|
|
test == IT_MM_COMIGE_SS ||
|
|
test == IT_MM_COMIEQ_SS ||
|
|
test == IT_MM_COMINEQ_SS ||
|
|
test == IT_MM_COMIGT_SS) // if testing for NAN's make sure we have some nans
|
|
{
|
|
// One out of four times
|
|
// Make sure a couple of values have NANs for testing purposes
|
|
if ((rand() & 3) == 0)
|
|
{
|
|
uint32_t r1 = rand() & 3;
|
|
uint32_t r2 = rand() & 3;
|
|
mTestFloatPointer1[r1] = getNAN();
|
|
mTestFloatPointer2[r2] = getNAN();
|
|
}
|
|
}
|
|
|
|
// one out of every random 64 times or so, mix up the test floats to contain some integer values
|
|
if ((rand() & 63) == 0)
|
|
{
|
|
uint32_t option = rand() & 3;
|
|
switch (option)
|
|
{
|
|
// All integers..
|
|
case 0:
|
|
mTestFloatPointer1[0] = float(mTestIntPointer1[0]);
|
|
mTestFloatPointer1[1] = float(mTestIntPointer1[1]);
|
|
mTestFloatPointer1[2] = float(mTestIntPointer1[2]);
|
|
mTestFloatPointer1[3] = float(mTestIntPointer1[3]);
|
|
|
|
mTestFloatPointer2[0] = float(mTestIntPointer2[0]);
|
|
mTestFloatPointer2[1] = float(mTestIntPointer2[1]);
|
|
mTestFloatPointer2[2] = float(mTestIntPointer2[2]);
|
|
mTestFloatPointer2[3] = float(mTestIntPointer2[3]);
|
|
|
|
break;
|
|
case 1:
|
|
{
|
|
uint32_t index = rand() & 3;
|
|
mTestFloatPointer1[index] = float(mTestIntPointer1[index]);
|
|
index = rand() & 3;
|
|
mTestFloatPointer2[index] = float(mTestIntPointer2[index]);
|
|
}
|
|
break;
|
|
case 2:
|
|
{
|
|
uint32_t index1 = rand() & 3;
|
|
uint32_t index2 = rand() & 3;
|
|
mTestFloatPointer1[index1] = float(mTestIntPointer1[index1]);
|
|
mTestFloatPointer1[index2] = float(mTestIntPointer1[index2]);
|
|
index1 = rand() & 3;
|
|
index2 = rand() & 3;
|
|
mTestFloatPointer1[index1] = float(mTestIntPointer1[index1]);
|
|
mTestFloatPointer1[index2] = float(mTestIntPointer1[index2]);
|
|
}
|
|
break;
|
|
case 3:
|
|
mTestFloatPointer1[0] = float(mTestIntPointer1[0]);
|
|
mTestFloatPointer1[1] = float(mTestIntPointer1[1]);
|
|
mTestFloatPointer1[2] = float(mTestIntPointer1[2]);
|
|
mTestFloatPointer1[3] = float(mTestIntPointer1[3]);
|
|
break;
|
|
}
|
|
if ((rand() & 3) == 0) // one out of 4 times, make halves
|
|
{
|
|
for (uint32_t j = 0; j < 4; j++)
|
|
{
|
|
mTestFloatPointer1[j] *= 0.5f;
|
|
mTestFloatPointer2[j] *= 0.5f;
|
|
}
|
|
}
|
|
}
|
|
#if 0
|
|
{
|
|
mTestFloatPointer1[0] = getNAN();
|
|
mTestFloatPointer2[0] = getNAN();
|
|
bool ok = test_mm_comilt_ss(mTestFloatPointer1, mTestFloatPointer1);
|
|
if (!ok)
|
|
{
|
|
printf("Debug me");
|
|
}
|
|
}
|
|
#endif
|
|
ret = runSingleTest(test,i);
|
|
if ( !ret ) // the test failed...
|
|
{
|
|
// Set a breakpoint here if you want to step through the failure case in the debugger
|
|
ret = runSingleTest(test,i);
|
|
break;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
virtual void release(void)
|
|
{
|
|
delete this;
|
|
}
|
|
|
|
float *mTestFloatPointer1;
|
|
float *mTestFloatPointer2;
|
|
int32_t *mTestIntPointer1;
|
|
int32_t *mTestIntPointer2;
|
|
float mTestFloats[MAX_TEST_VALUE];
|
|
int32_t mTestInts[MAX_TEST_VALUE];
|
|
};
|
|
|
|
SSE2NEONTest *SSE2NEONTest::create(void)
|
|
{
|
|
SSE2NEONTestImpl *st = new SSE2NEONTestImpl;
|
|
return static_cast<SSE2NEONTest *>(st);
|
|
}
|
|
|
|
} // end of SSE2NEON namespace
|