Arm Neon           

arm neon function interface

the header file “arm_neon.h” could be found after install visual studio 2017.

example code using c language

#if !defined(__ARM_NEON)
#error "NEON support not enabled"
#endif

#include <arm_neon.h>

typedef struct 
{
  float32x4_t H_re_low;
  float32x4_t H_re_high;
  float32x4_t H_im_low;
  float32x4_t H_im_high;
  float32x4_t V_re;
  float32x4_t V_im;
}Neon_vector;


static inline float exampleFunction( const Neon_vector* restrict a, const Neon_vector* restrict b )
{
  float32x4_t H_re_low  = vaddq_f32(vmulq_f32(a->H_re_low,  b->H_re_low),  vmulq_f32(a->H_im_low,  b->H_im_low));
  float32x4_t H_re_high = vaddq_f32(vmulq_f32(a->H_re_high, b->H_re_high), vmulq_f32(a->H_im_high, b->H_im_high));
  float32x4_t H_re = vaddq_f32(H_re_low, H_re_high);
  float32x4_t H_im_low  = vsubq_f32(vmulq_f32(a->H_re_low,  b->H_im_low),  vmulq_f32(a->H_im_low,  b->H_re_low));
  float32x4_t H_im_high = vsubq_f32(vmulq_f32(a->H_re_high, b->H_im_high), vmulq_f32(a->H_im_high, b->H_re_high));
  float32x4_t H_im = vaddq_f32(H_im_low, H_im_high);
  float32x2_t H_re_im = vpadd_f32(vpadd_f32(vget_low_f32(H_re), vget_high_f32(H_re)), vpadd_f32(vget_low_f32(H_im), vget_high_f32(H_im)));

  float32x4_t V_re = vaddq_f32(vmulq_f32(a->V_re, b->V_re), vmulq_f32(a->V_im, b->V_im));
  float32x4_t V_im = vsubq_f32(vmulq_f32(a->V_re, b->V_im), vmulq_f32(a->V_im, b->V_re));
  float32x2_t V_re_im = vpadd_f32(vpadd_f32(vget_low_f32(V_re), vget_high_f32(V_re)), vpadd_f32(vget_low_f32(V_im), vget_high_f32(V_im)));

  float32x4_t H_V = vmulq_f32(vcombine_f32(H_re_im, V_re_im), vcombine_f32(H_re_im, V_re_im));
  float32x2_t H_V_squaredNorm = vpadd_f32(vget_low_f32(H_V), vget_high_f32(H_V));

  return vget_lane_f32(H_V_squaredNorm, 0) * vget_lane_f32(H_V_squaredNorm, 1);
}