Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Implement FMA function _mm_fmadd_pd #245

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions sse2neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -3630,6 +3630,21 @@ FORCE_INLINE __m128 _mm_fmadd_ps(__m128 a, __m128 b, __m128 c)
#endif
}

// Multiply packed double-precision (64-bit) floating-point elements in a and b,
// add the intermediate result to packed elements in c, and store the results in
// dst.
// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_fmadd_pd
FORCE_INLINE __m128d _mm_fmadd_pd(__m128d a, __m128d b, __m128d c)
{
#if defined(__aarch64__)
return vreinterpretq_m128d_f64(vfmaq_f64(vreinterpretq_f64_m128d(c),
vreinterpretq_f64_m128d(b),
vreinterpretq_f64_m128d(a)));
#else
return _mm_add_pd(_mm_mul_pd(a, b), c);
#endif
}

// Alternatively add and subtract packed single-precision (32-bit)
// floating-point elements in a to/from packed elements in b, and store the
// results in dst.
Expand Down
10 changes: 10 additions & 0 deletions tests/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,4 +345,14 @@ result_t validateDouble(__m128d a, double d0, double d1)
return TEST_SUCCESS;
}

result_t validateDoubleEpsilon(__m128d a, double f0, double f1, double epsilon)
{
const double *t = (const double *) &a;
double df0 = fabs(t[0] - f0);
double df1 = fabs(t[1] - f1);
ASSERT_RETURN(df0 < epsilon);
ASSERT_RETURN(df1 < epsilon);
return TEST_SUCCESS;
}

} // namespace SSE2NEON
1 change: 1 addition & 0 deletions tests/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ result_t validateFloatError(__m128 a,
float f3,
float err);
result_t validateDouble(__m128d a, double d0, double d1);
result_t validateDoubleEpsilon(__m128d a, double d0, double d1, double epsilon);
} // namespace SSE2NEON

#endif
20 changes: 20 additions & 0 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7404,6 +7404,26 @@ result_t test_mm_fmadd_ps(const SSE2NEONTestImpl &impl, uint32_t i)
return TEST_UNIMPL;
}

result_t test_mm_fmadd_pd(const SSE2NEONTestImpl &impl, uint32_t i)
{
const double _a[2]{(double) impl.mTestFloats[0],
(double) impl.mTestFloats[1]};
const double _b[2]{(double) impl.mTestFloats[2],
(double) impl.mTestFloats[3]};
const double _c[2]{(double) impl.mTestFloats[4],
(double) impl.mTestFloats[5]};

__m128d a = _mm_load_pd(_a);
__m128d b = _mm_load_pd(_b);
__m128d c = _mm_load_pd(_c);

double f0 = _a[0] * _b[0] + _c[0];
double f1 = _a[1] * _b[1] + _c[1];

__m128d d = _mm_fmadd_pd(a, b, c);
return validateDoubleEpsilon(d, f0, f1, 0.0001f);
}

/* Others */
result_t test_mm_clmulepi64_si128(const SSE2NEONTestImpl &impl, uint32_t i)
{
Expand Down
1 change: 1 addition & 0 deletions tests/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@
TYPE(mm_aeskeygenassist_si128) \
/* FMA */ \
TYPE(mm_fmadd_ps) \
TYPE(mm_fmadd_pd) \
/* Others */ \
TYPE(mm_clmulepi64_si128) \
TYPE(mm_popcnt_u32) \
Expand Down