diff --git a/src/runner_iact.h b/src/runner_iact.h
index c9338c9e2adf3d2cc576629a02819d0a393d9d69..b7b237aa38509eb13072884881a806041a8e3ef3 100644
--- a/src/runner_iact.h
+++ b/src/runner_iact.h
@@ -66,39 +66,33 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
     for ( k = 0 ; k < 3 ; k++ )
         curlvr[k] *= ri;
             
-    if ( r2 < hi*hi*kernel_gamma2 ) {
-        
-        h_inv = 1.0 / hi;
-        xi = r * h_inv;
-        kernel_deval( xi , &wi , &wi_dx );
-        
-        pi->rho += mj * wi;
-        pi->rho_dh -= mj * ( 3.0*wi + xi*wi_dx );
-        pi->density.wcount += wi;
-        pi->density.wcount_dh -= xi * wi_dx;
-
-	    pi->density.div_v += mj * dvdr * wi_dx;
-	    for ( k = 0 ; k < 3 ; k++ )
-	        pi->density.curl_v[k] += mj * curlvr[k] * wi_dx;
-            
-        }
+    /* Compute density of pi. */
+    h_inv = 1.0 / hi;
+    xi = r * h_inv;
+    kernel_deval( xi , &wi , &wi_dx );
 
-    if ( r2 < hj*hj*kernel_gamma2 ) {
-        
-        h_inv = 1.0 / hj;
-        xj = r * h_inv;
-        kernel_deval( xj , &wj , &wj_dx );
-        
-        pj->rho += mi * wj;
-        pj->rho_dh -= mi * ( 3.0*wj + xj*wj_dx );
-        pj->density.wcount += wj;
-        pj->density.wcount_dh -= xj * wj_dx;
-        
-	    pj->density.div_v += mi * dvdr * wj_dx;
-	    for ( k = 0 ; k < 3 ; k++ )
-	        pj->density.curl_v[k] += mi * curlvr[k] * wj_dx;
-            
-        }
+    pi->rho += mj * wi;
+    pi->rho_dh -= mj * ( 3.0*wi + xi*wi_dx );
+    pi->density.wcount += wi;
+    pi->density.wcount_dh -= xi * wi_dx;
+
+	pi->density.div_v += mj * dvdr * wi_dx;
+	for ( k = 0 ; k < 3 ; k++ )
+	    pi->density.curl_v[k] += mj * curlvr[k] * wi_dx;
+
+    /* Compute density of pj. */
+    h_inv = 1.0 / hj;
+    xj = r * h_inv;
+    kernel_deval( xj , &wj , &wj_dx );
+
+    pj->rho += mi * wj;
+    pj->rho_dh -= mi * ( 3.0*wj + xj*wj_dx );
+    pj->density.wcount += wj;
+    pj->density.wcount_dh -= xj * wj_dx;
+
+	pj->density.div_v += mi * dvdr * wj_dx;
+	for ( k = 0 ; k < 3 ; k++ )
+	    pj->density.curl_v[k] += mi * curlvr[k] * wj_dx;
         
     }
     
@@ -121,23 +115,23 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_density ( flo
     r.v = vec_sqrt( vec_load( R2 ) );
     ri.v = vec_rcp( r.v );
     #if VEC_SIZE==8
-        mi.v = _mm256_set_ps( pi[7]->mass , pi[6]->mass , pi[5]->mass , pi[4]->mass , pi[3]->mass , pi[2]->mass , pi[1]->mass , pi[0]->mass );
-        mj.v = _mm256_set_ps( pj[7]->mass , pj[6]->mass , pj[5]->mass , pj[4]->mass , pj[3]->mass , pj[2]->mass , pj[1]->mass , pj[0]->mass );
+        mi.v = vec_set( pi[7]->mass , pi[6]->mass , pi[5]->mass , pi[4]->mass , pi[3]->mass , pi[2]->mass , pi[1]->mass , pi[0]->mass );
+        mj.v = vec_set( pj[7]->mass , pj[6]->mass , pj[5]->mass , pj[4]->mass , pj[3]->mass , pj[2]->mass , pj[1]->mass , pj[0]->mass );
         for ( k = 0 ; k < 3 ; k++ ) {
-            vi[k].v = _mm256_set_ps( pi[7]->v[k] , pi[6]->v[k] , pi[5]->v[k] , pi[4]->v[k] , pi[3]->v[k] , pi[2]->v[k] , pi[1]->v[k] , pi[0]->v[k] );
-            vj[k].v = _mm256_set_ps( pj[7]->v[k] , pj[6]->v[k] , pj[5]->v[k] , pj[4]->v[k] , pj[3]->v[k] , pj[2]->v[k] , pj[1]->v[k] , pj[0]->v[k] );
+            vi[k].v = vec_set( pi[7]->v[k] , pi[6]->v[k] , pi[5]->v[k] , pi[4]->v[k] , pi[3]->v[k] , pi[2]->v[k] , pi[1]->v[k] , pi[0]->v[k] );
+            vj[k].v = vec_set( pj[7]->v[k] , pj[6]->v[k] , pj[5]->v[k] , pj[4]->v[k] , pj[3]->v[k] , pj[2]->v[k] , pj[1]->v[k] , pj[0]->v[k] );
             }
         for ( k = 0 ; k < 3 ; k++ )
-            dx[k].v = _mm256_set_ps( Dx[21+k] , Dx[18+k] , Dx[15+k] , Dx[12+k] , Dx[9+k] , Dx[6+k] , Dx[3+k] , Dx[0+k] );
+            dx[k].v = vec_set( Dx[0+k] , Dx[3+k] , Dx[6+k] , Dx[9+k] , Dx[12+k] , Dx[15+k] , Dx[18+k] , Dx[21+k] );
     #elif VEC_SIZE==4
-        mi.v = _mm_set_ps( pi[3]->mass , pi[2]->mass , pi[1]->mass , pi[0]->mass );
-        mj.v = _mm_set_ps( pj[3]->mass , pj[2]->mass , pj[1]->mass , pj[0]->mass );
+        mi.v = vec_set( pi[0]->mass , pi[1]->mass , pi[2]->mass , pi[3]->mass );
+        mj.v = vec_set( pj[0]->mass , pj[1]->mass , pj[2]->mass , pj[3]->mass );
         for ( k = 0 ; k < 3 ; k++ ) {
-            vi[k].v = _mm_set_ps( pi[3]->v[k] , pi[2]->v[k] , pi[1]->v[k] , pi[0]->v[k] );
-            vj[k].v = _mm_set_ps( pj[3]->v[k] , pj[2]->v[k] , pj[1]->v[k] , pj[0]->v[k] );
+            vi[k].v = vec_set( pi[0]->v[k] , pi[1]->v[k] , pi[2]->v[k] , pi[3]->v[k] );
+            vj[k].v = vec_set( pj[0]->v[k] , pj[1]->v[k] , pj[2]->v[k] , pj[3]->v[k] );
             }
         for ( k = 0 ; k < 3 ; k++ )
-            dx[k].v = _mm_set_ps( Dx[9+k] , Dx[6+k] , Dx[3+k] , Dx[0+k] );
+            dx[k].v = vec_set( Dx[0+k] , Dx[3+k] , Dx[6+k] , Dx[9+k] );
     #endif
 
     hi.v = vec_load( Hi );
@@ -285,21 +279,21 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_densit
     r.v = vec_sqrt( vec_load( R2 ) );
     ri.v = vec_rcp( r.v );
     #if VEC_SIZE==8
-        mj.v = _mm256_set_ps( pj[7]->mass , pj[6]->mass , pj[5]->mass , pj[4]->mass , pj[3]->mass , pj[2]->mass , pj[1]->mass , pj[0]->mass );
+        mj.v = vec_set( pj[0]->mass , pj[1]->mass , pj[2]->mass , pj[3]->mass , pj[4]->mass , pj[5]->mass , pj[6]->mass , pj[7]->mass );
         for ( k = 0 ; k < 3 ; k++ ) {
-            vi[k].v = _mm256_set_ps( pi[7]->v[k] , pi[6]->v[k] , pi[5]->v[k] , pi[4]->v[k] , pi[3]->v[k] , pi[2]->v[k] , pi[1]->v[k] , pi[0]->v[k] );
-            vj[k].v = _mm256_set_ps( pj[7]->v[k] , pj[6]->v[k] , pj[5]->v[k] , pj[4]->v[k] , pj[3]->v[k] , pj[2]->v[k] , pj[1]->v[k] , pj[0]->v[k] );
+            vi[k].v = vec_set( pi[0]->v[k] , pi[1]->v[k] , pi[2]->v[k] , pi[3]->v[k] , pi[4]->v[k] , pi[5]->v[k] , pi[6]->v[k] , pi[7]->v[k] );
+            vj[k].v = vec_set( pj[0]->v[k] , pj[1]->v[k] , pj[2]->v[k] , pj[3]->v[k] , pj[4]->v[k] , pj[5]->v[k] , pj[6]->v[k] , pj[7]->v[k] );
             }
         for ( k = 0 ; k < 3 ; k++ )
-            dx[k].v = _mm256_set_ps( Dx[21+k] , Dx[18+k] , Dx[15+k] , Dx[12+k] , Dx[9+k] , Dx[6+k] , Dx[3+k] , Dx[0+k] );
+            dx[k].v = vec_set( Dx[0+k] , Dx[3+k] , Dx[6+k] , Dx[9+k] , Dx[12+k] , Dx[15+k] , Dx[18+k] , Dx[21+k] );
     #elif VEC_SIZE==4
-        mj.v = _mm_set_ps( pj[3]->mass , pj[2]->mass , pj[1]->mass , pj[0]->mass );
+        mj.v = vec_set( pj[0]->mass , pj[1]->mass , pj[2]->mass , pj[3]->mass );
         for ( k = 0 ; k < 3 ; k++ ) {
-            vi[k].v = _mm_set_ps( pi[3]->v[k] , pi[2]->v[k] , pi[1]->v[k] , pi[0]->v[k] );
-            vj[k].v = _mm_set_ps( pj[3]->v[k] , pj[2]->v[k] , pj[1]->v[k] , pj[0]->v[k] );
+            vi[k].v = vec_set( pi[0]->v[k] , pi[1]->v[k] , pi[2]->v[k] , pi[3]->v[k] );
+            vj[k].v = vec_set( pj[0]->v[k] , pj[1]->v[k] , pj[2]->v[k] , pj[3]->v[k] );
             }
         for ( k = 0 ; k < 3 ; k++ )
-            dx[k].v = _mm_set_ps( Dx[9+k] , Dx[6+k] , Dx[3+k] , Dx[0+k] );
+            dx[k].v = vec_set( Dx[0+k] , Dx[3+k] , Dx[6+k] , Dx[9+k] );
     #endif
     
     hi.v = vec_load( Hi );
@@ -459,43 +453,43 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_vec_force ( float
 
     /* Load stuff. */
     #if VEC_SIZE==8
-        mi.v = _mm256_set_ps( pi[7]->mass , pi[6]->mass , pi[5]->mass , pi[4]->mass , pi[3]->mass , pi[2]->mass , pi[1]->mass , pi[0]->mass );
-        mj.v = _mm256_set_ps( pj[7]->mass , pj[6]->mass , pj[5]->mass , pj[4]->mass , pj[3]->mass , pj[2]->mass , pj[1]->mass , pj[0]->mass );
-        piPOrho2.v = _mm256_set_ps( pi[7]->force.POrho2 , pi[6]->force.POrho2 , pi[5]->force.POrho2 , pi[4]->force.POrho2 , pi[3]->force.POrho2 , pi[2]->force.POrho2 , pi[1]->force.POrho2 , pi[0]->force.POrho2 );
-        pjPOrho2.v = _mm256_set_ps( pj[7]->force.POrho2 , pj[6]->force.POrho2 , pj[5]->force.POrho2 , pj[4]->force.POrho2 , pj[3]->force.POrho2 , pj[2]->force.POrho2 , pj[1]->force.POrho2 , pj[0]->force.POrho2 );
-        pirho.v = _mm256_set_ps( pi[7]->rho , pi[6]->rho , pi[5]->rho , pi[4]->rho , pi[3]->rho , pi[2]->rho , pi[1]->rho , pi[0]->rho );
-        pjrho.v = _mm256_set_ps( pj[7]->rho , pj[6]->rho , pj[5]->rho , pj[4]->rho , pj[3]->rho , pj[2]->rho , pj[1]->rho , pj[0]->rho );
-        ci.v = _mm256_set_ps( pi[7]->force.c , pi[6]->force.c , pi[5]->force.c , pi[4]->force.c , pi[3]->force.c , pi[2]->force.c , pi[1]->force.c , pi[0]->force.c );
-        cj.v = _mm256_set_ps( pj[7]->force.c , pj[6]->force.c , pj[5]->force.c , pj[4]->force.c , pj[3]->force.c , pj[2]->force.c , pj[1]->force.c , pj[0]->force.c );
-        vi_sig.v = _mm256_set_ps( pi[7]->force.v_sig , pi[6]->force.v_sig , pi[5]->force.v_sig , pi[4]->force.v_sig , pi[3]->force.v_sig , pi[2]->force.v_sig , pi[1]->force.v_sig , pi[0]->force.v_sig );
-        vj_sig.v = _mm256_set_ps( pj[7]->force.v_sig , pj[6]->force.v_sig , pj[5]->force.v_sig , pj[4]->force.v_sig , pj[3]->force.v_sig , pj[2]->force.v_sig , pj[1]->force.v_sig , pj[0]->force.v_sig );
+        mi.v = vec_set( pi[0]->mass , pi[1]->mass , pi[2]->mass , pi[3]->mass , pi[4]->mass , pi[5]->mass , pi[6]->mass , pi[7]->mass );
+        mj.v = vec_set( pj[0]->mass , pj[1]->mass , pj[2]->mass , pj[3]->mass , pj[4]->mass , pj[5]->mass , pj[6]->mass , pj[7]->mass );
+        piPOrho2.v = vec_set( pi[0]->force.POrho2 , pi[1]->force.POrho2 , pi[2]->force.POrho2 , pi[3]->force.POrho2 , pi[4]->force.POrho2 , pi[5]->force.POrho2 , pi[6]->force.POrho2 , pi[7]->force.POrho2 );
+        pjPOrho2.v = vec_set( pj[0]->force.POrho2 , pj[1]->force.POrho2 , pj[2]->force.POrho2 , pj[3]->force.POrho2 , pj[4]->force.POrho2 , pj[5]->force.POrho2 , pj[6]->force.POrho2 , pj[7]->force.POrho2 );
+        pirho.v = vec_set( pi[0]->rho , pi[1]->rho , pi[2]->rho , pi[3]->rho , pi[4]->rho , pi[5]->rho , pi[6]->rho , pi[7]->rho );
+        pjrho.v = vec_set( pj[0]->rho , pj[1]->rho , pj[2]->rho , pj[3]->rho , pj[4]->rho , pj[5]->rho , pj[6]->rho , pj[7]->rho );
+        ci.v = vec_set( pi[0]->force.c , pi[1]->force.c , pi[2]->force.c , pi[3]->force.c , pi[4]->force.c , pi[5]->force.c , pi[6]->force.c , pi[7]->force.c );
+        cj.v = vec_set( pj[0]->force.c , pj[1]->force.c , pj[2]->force.c , pj[3]->force.c , pj[4]->force.c , pj[5]->force.c , pj[6]->force.c , pj[7]->force.c );
+        vi_sig.v = vec_set( pi[0]->force.v_sig , pi[1]->force.v_sig , pi[2]->force.v_sig , pi[3]->force.v_sig , pi[4]->force.v_sig , pi[5]->force.v_sig , pi[6]->force.v_sig , pi[7]->force.v_sig );
+        vj_sig.v = vec_set( pj[0]->force.v_sig , pj[1]->force.v_sig , pj[2]->force.v_sig , pj[3]->force.v_sig , pj[4]->force.v_sig , pj[5]->force.v_sig , pj[6]->force.v_sig , pj[7]->force.v_sig );
         for ( k = 0 ; k < 3 ; k++ ) {
-            vi[k].v = _mm256_set_ps( pi[7]->v[k] , pi[6]->v[k] , pi[5]->v[k] , pi[4]->v[k] , pi[3]->v[k] , pi[2]->v[k] , pi[1]->v[k] , pi[0]->v[k] );
-            vj[k].v = _mm256_set_ps( pj[7]->v[k] , pj[6]->v[k] , pj[5]->v[k] , pj[4]->v[k] , pj[3]->v[k] , pj[2]->v[k] , pj[1]->v[k] , pj[0]->v[k] );
+            vi[k].v = vec_set( pi[0]->v[k] , pi[1]->v[k] , pi[2]->v[k] , pi[3]->v[k] , pi[4]->v[k] , pi[5]->v[k] , pi[6]->v[k] , pi[7]->v[k] );
+            vj[k].v = vec_set( pj[0]->v[k] , pj[1]->v[k] , pj[2]->v[k] , pj[3]->v[k] , pj[4]->v[k] , pj[5]->v[k] , pj[6]->v[k] , pj[7]->v[k] );
             }
         for ( k = 0 ; k < 3 ; k++ )
-            dx[k].v = _mm256_set_ps( Dx[21+k] , Dx[18+k] , Dx[15+k] , Dx[12+k] , Dx[9+k] , Dx[6+k] , Dx[3+k] , Dx[0+k] );
-        balsara.v = _mm256_set_ps( pi[7]->force.balsara , pi[6]->force.balsara , pi[5]->force.balsara , pi[4]->force.balsara , pi[3]->force.balsara , pi[2]->force.balsara , pi[1]->force.balsara , pi[0]->force.balsara ) +
-                    _mm256_set_ps( pj[7]->force.balsara , pj[6]->force.balsara , pj[5]->force.balsara , pj[4]->force.balsara , pj[3]->force.balsara , pj[2]->force.balsara , pj[1]->force.balsara , pj[0]->force.balsara );
+            dx[k].v = vec_set( Dx[0+k] , Dx[3+k] , Dx[6+k] , Dx[9+k] , Dx[12+k] , Dx[15+k] , Dx[18+k] , Dx[21+k] );
+        balsara.v = vec_set( pi[0]->force.balsara , pi[1]->force.balsara , pi[2]->force.balsara , pi[3]->force.balsara , pi[4]->force.balsara , pi[5]->force.balsara , pi[6]->force.balsara , pi[7]->force.balsara ) +
+                    vec_set( pj[0]->force.balsara , pj[1]->force.balsara , pj[2]->force.balsara , pj[3]->force.balsara , pj[4]->force.balsara , pj[5]->force.balsara , pj[6]->force.balsara , pj[7]->force.balsara );
     #elif VEC_SIZE==4
-        mi.v = _mm_set_ps( pi[3]->mass , pi[2]->mass , pi[1]->mass , pi[0]->mass );
-        mj.v = _mm_set_ps( pj[3]->mass , pj[2]->mass , pj[1]->mass , pj[0]->mass );
-        piPOrho2.v = _mm_set_ps( pi[3]->force.POrho2 , pi[2]->force.POrho2 , pi[1]->force.POrho2 , pi[0]->force.POrho2 );
-        pjPOrho2.v = _mm_set_ps( pj[3]->force.POrho2 , pj[2]->force.POrho2 , pj[1]->force.POrho2 , pj[0]->force.POrho2 );
-        pirho.v = _mm_set_ps( pi[3]->rho , pi[2]->rho , pi[1]->rho , pi[0]->rho );
-        pjrho.v = _mm_set_ps( pj[3]->rho , pj[2]->rho , pj[1]->rho , pj[0]->rho );
-        ci.v = _mm_set_ps( pi[3]->force.c , pi[2]->force.c , pi[1]->force.c , pi[0]->force.c );
-        cj.v = _mm_set_ps( pj[3]->force.c , pj[2]->force.c , pj[1]->force.c , pj[0]->force.c );
-        vi_sig.v = _mm_set_ps( pi[3]->force.v_sig , pi[2]->force.v_sig , pi[1]->force.v_sig , pi[0]->force.v_sig );
-        vj_sig.v = _mm_set_ps( pj[3]->force.v_sig , pj[2]->force.v_sig , pj[1]->force.v_sig , pj[0]->force.v_sig );
+        mi.v = vec_set( pi[0]->mass , pi[1]->mass , pi[2]->mass , pi[3]->mass );
+        mj.v = vec_set( pj[0]->mass , pj[1]->mass , pj[2]->mass , pj[3]->mass );
+        piPOrho2.v = vec_set( pi[0]->force.POrho2 , pi[1]->force.POrho2 , pi[2]->force.POrho2 , pi[3]->force.POrho2 );
+        pjPOrho2.v = vec_set( pj[0]->force.POrho2 , pj[1]->force.POrho2 , pj[2]->force.POrho2 , pj[3]->force.POrho2 );
+        pirho.v = vec_set( pi[0]->rho , pi[1]->rho , pi[2]->rho , pi[3]->rho );
+        pjrho.v = vec_set( pj[0]->rho , pj[1]->rho , pj[2]->rho , pj[3]->rho );
+        ci.v = vec_set( pi[0]->force.c , pi[1]->force.c , pi[2]->force.c , pi[3]->force.c );
+        cj.v = vec_set( pj[0]->force.c , pj[1]->force.c , pj[2]->force.c , pj[3]->force.c );
+        vi_sig.v = vec_set( pi[0]->force.v_sig , pi[1]->force.v_sig , pi[2]->force.v_sig , pi[3]->force.v_sig );
+        vj_sig.v = vec_set( pj[0]->force.v_sig , pj[1]->force.v_sig , pj[2]->force.v_sig , pj[3]->force.v_sig );
         for ( k = 0 ; k < 3 ; k++ ) {
-            vi[k].v = _mm_set_ps( pi[3]->v[k] , pi[2]->v[k] , pi[1]->v[k] , pi[0]->v[k] );
-            vj[k].v = _mm_set_ps( pj[3]->v[k] , pj[2]->v[k] , pj[1]->v[k] , pj[0]->v[k] );
+            vi[k].v = vec_set( pi[0]->v[k] , pi[1]->v[k] , pi[2]->v[k] , pi[3]->v[k] );
+            vj[k].v = vec_set( pj[0]->v[k] , pj[1]->v[k] , pj[2]->v[k] , pj[3]->v[k] );
             }
         for ( k = 0 ; k < 3 ; k++ )
-            dx[k].v = _mm_set_ps( Dx[9+k] , Dx[6+k] , Dx[3+k] , Dx[0+k] );
-        balsara.v = _mm_set_ps( pi[3]->force.balsara , pi[2]->force.balsara , pi[1]->force.balsara , pi[0]->force.balsara ) +
-                    _mm_set_ps( pj[3]->force.balsara , pj[2]->force.balsara , pj[1]->force.balsara , pj[0]->force.balsara );
+            dx[k].v = vec_set( Dx[0+k] , Dx[3+k] , Dx[6+k] , Dx[9+k] );
+        balsara.v = vec_set( pi[0]->force.balsara , pi[1]->force.balsara , pi[2]->force.balsara , pi[3]->force.balsara ) +
+                    vec_set( pj[0]->force.balsara , pj[1]->force.balsara , pj[2]->force.balsara , pj[3]->force.balsara );
     #else
         #error
     #endif
@@ -688,41 +682,41 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_vec_force
 
     /* Load stuff. */
     #if VEC_SIZE==8
-        mj.v = _mm256_set_ps( pj[7]->mass , pj[6]->mass , pj[5]->mass , pj[4]->mass , pj[3]->mass , pj[2]->mass , pj[1]->mass , pj[0]->mass );
-        piPOrho2.v = _mm256_set_ps( pi[7]->force.POrho2 , pi[6]->force.POrho2 , pi[5]->force.POrho2 , pi[4]->force.POrho2 , pi[3]->force.POrho2 , pi[2]->force.POrho2 , pi[1]->force.POrho2 , pi[0]->force.POrho2 );
-        pjPOrho2.v = _mm256_set_ps( pj[7]->force.POrho2 , pj[6]->force.POrho2 , pj[5]->force.POrho2 , pj[4]->force.POrho2 , pj[3]->force.POrho2 , pj[2]->force.POrho2 , pj[1]->force.POrho2 , pj[0]->force.POrho2 );
-        pirho.v = _mm256_set_ps( pi[7]->rho , pi[6]->rho , pi[5]->rho , pi[4]->rho , pi[3]->rho , pi[2]->rho , pi[1]->rho , pi[0]->rho );
-        pjrho.v = _mm256_set_ps( pj[7]->rho , pj[6]->rho , pj[5]->rho , pj[4]->rho , pj[3]->rho , pj[2]->rho , pj[1]->rho , pj[0]->rho );
-        ci.v = _mm256_set_ps( pi[7]->force.c , pi[6]->force.c , pi[5]->force.c , pi[4]->force.c , pi[3]->force.c , pi[2]->force.c , pi[1]->force.c , pi[0]->force.c );
-        cj.v = _mm256_set_ps( pj[7]->force.c , pj[6]->force.c , pj[5]->force.c , pj[4]->force.c , pj[3]->force.c , pj[2]->force.c , pj[1]->force.c , pj[0]->force.c );
-        vi_sig.v = _mm256_set_ps( pi[7]->force.v_sig , pi[6]->force.v_sig , pi[5]->force.v_sig , pi[4]->force.v_sig , pi[3]->force.v_sig , pi[2]->force.v_sig , pi[1]->force.v_sig , pi[0]->force.v_sig );
-        vj_sig.v = _mm256_set_ps( pj[7]->force.v_sig , pj[6]->force.v_sig , pj[5]->force.v_sig , pj[4]->force.v_sig , pj[3]->force.v_sig , pj[2]->force.v_sig , pj[1]->force.v_sig , pj[0]->force.v_sig );
+        mj.v = vec_set( pj[0]->mass , pj[1]->mass , pj[2]->mass , pj[3]->mass , pj[4]->mass , pj[5]->mass , pj[6]->mass , pj[7]->mass );
+        piPOrho2.v = vec_set( pi[0]->force.POrho2 , pi[1]->force.POrho2 , pi[2]->force.POrho2 , pi[3]->force.POrho2 , pi[4]->force.POrho2 , pi[5]->force.POrho2 , pi[6]->force.POrho2 , pi[7]->force.POrho2 );
+        pjPOrho2.v = vec_set( pj[0]->force.POrho2 , pj[1]->force.POrho2 , pj[2]->force.POrho2 , pj[3]->force.POrho2 , pj[4]->force.POrho2 , pj[5]->force.POrho2 , pj[6]->force.POrho2 , pj[7]->force.POrho2 );
+        pirho.v = vec_set( pi[0]->rho , pi[1]->rho , pi[2]->rho , pi[3]->rho , pi[4]->rho , pi[5]->rho , pi[6]->rho , pi[7]->rho );
+        pjrho.v = vec_set( pj[0]->rho , pj[1]->rho , pj[2]->rho , pj[3]->rho , pj[4]->rho , pj[5]->rho , pj[6]->rho , pj[7]->rho );
+        ci.v = vec_set( pi[0]->force.c , pi[1]->force.c , pi[2]->force.c , pi[3]->force.c , pi[4]->force.c , pi[5]->force.c , pi[6]->force.c , pi[7]->force.c );
+        cj.v = vec_set( pj[0]->force.c , pj[1]->force.c , pj[2]->force.c , pj[3]->force.c , pj[4]->force.c , pj[5]->force.c , pj[6]->force.c , pj[7]->force.c );
+        vi_sig.v = vec_set( pi[0]->force.v_sig , pi[1]->force.v_sig , pi[2]->force.v_sig , pi[3]->force.v_sig , pi[4]->force.v_sig , pi[5]->force.v_sig , pi[6]->force.v_sig , pi[7]->force.v_sig );
+        vj_sig.v = vec_set( pj[0]->force.v_sig , pj[1]->force.v_sig , pj[2]->force.v_sig , pj[3]->force.v_sig , pj[4]->force.v_sig , pj[5]->force.v_sig , pj[6]->force.v_sig , pj[7]->force.v_sig );
         for ( k = 0 ; k < 3 ; k++ ) {
-            vi[k].v = _mm256_set_ps( pi[7]->v[k] , pi[6]->v[k] , pi[5]->v[k] , pi[4]->v[k] , pi[3]->v[k] , pi[2]->v[k] , pi[1]->v[k] , pi[0]->v[k] );
-            vj[k].v = _mm256_set_ps( pj[7]->v[k] , pj[6]->v[k] , pj[5]->v[k] , pj[4]->v[k] , pj[3]->v[k] , pj[2]->v[k] , pj[1]->v[k] , pj[0]->v[k] );
+            vi[k].v = vec_set( pi[0]->v[k] , pi[1]->v[k] , pi[2]->v[k] , pi[3]->v[k] , pi[4]->v[k] , pi[5]->v[k] , pi[6]->v[k] , pi[7]->v[k] );
+            vj[k].v = vec_set( pj[0]->v[k] , pj[1]->v[k] , pj[2]->v[k] , pj[3]->v[k] , pj[4]->v[k] , pj[5]->v[k] , pj[6]->v[k] , pj[7]->v[k] );
             }
         for ( k = 0 ; k < 3 ; k++ )
-            dx[k].v = _mm256_set_ps( Dx[21+k] , Dx[18+k] , Dx[15+k] , Dx[12+k] , Dx[9+k] , Dx[6+k] , Dx[3+k] , Dx[0+k] );
-        balsara.v = _mm256_set_ps( pi[7]->force.balsara , pi[6]->force.balsara , pi[5]->force.balsara , pi[4]->force.balsara , pi[3]->force.balsara , pi[2]->force.balsara , pi[1]->force.balsara , pi[0]->force.balsara ) +
-                    _mm256_set_ps( pj[7]->force.balsara , pj[6]->force.balsara , pj[5]->force.balsara , pj[4]->force.balsara , pj[3]->force.balsara , pj[2]->force.balsara , pj[1]->force.balsara , pj[0]->force.balsara );
+            dx[k].v = vec_set( Dx[0+k] , Dx[3+k] , Dx[6+k] , Dx[9+k] , Dx[12+k] , Dx[15+k] , Dx[18+k] , Dx[21+k] );
+        balsara.v = vec_set( pi[0]->force.balsara , pi[1]->force.balsara , pi[2]->force.balsara , pi[3]->force.balsara , pi[4]->force.balsara , pi[5]->force.balsara , pi[6]->force.balsara , pi[7]->force.balsara ) +
+                    vec_set( pj[0]->force.balsara , pj[1]->force.balsara , pj[2]->force.balsara , pj[3]->force.balsara , pj[4]->force.balsara , pj[5]->force.balsara , pj[6]->force.balsara , pj[7]->force.balsara );
     #elif VEC_SIZE==4
-        mj.v = _mm_set_ps( pj[3]->mass , pj[2]->mass , pj[1]->mass , pj[0]->mass );
-        piPOrho2.v = _mm_set_ps( pi[3]->force.POrho2 , pi[2]->force.POrho2 , pi[1]->force.POrho2 , pi[0]->force.POrho2 );
-        pjPOrho2.v = _mm_set_ps( pj[3]->force.POrho2 , pj[2]->force.POrho2 , pj[1]->force.POrho2 , pj[0]->force.POrho2 );
-        pirho.v = _mm_set_ps( pi[3]->rho , pi[2]->rho , pi[1]->rho , pi[0]->rho );
-        pjrho.v = _mm_set_ps( pj[3]->rho , pj[2]->rho , pj[1]->rho , pj[0]->rho );
-        ci.v = _mm_set_ps( pi[3]->force.c , pi[2]->force.c , pi[1]->force.c , pi[0]->force.c );
-        cj.v = _mm_set_ps( pj[3]->force.c , pj[2]->force.c , pj[1]->force.c , pj[0]->force.c );
-        vi_sig.v = _mm_set_ps( pi[3]->force.v_sig , pi[2]->force.v_sig , pi[1]->force.v_sig , pi[0]->force.v_sig );
-        vj_sig.v = _mm_set_ps( pj[3]->force.v_sig , pj[2]->force.v_sig , pj[1]->force.v_sig , pj[0]->force.v_sig );
+        mj.v = vec_set( pj[0]->mass , pj[1]->mass , pj[2]->mass , pj[3]->mass );
+        piPOrho2.v = vec_set( pi[0]->force.POrho2 , pi[1]->force.POrho2 , pi[2]->force.POrho2 , pi[3]->force.POrho2 );
+        pjPOrho2.v = vec_set( pj[0]->force.POrho2 , pj[1]->force.POrho2 , pj[2]->force.POrho2 , pj[3]->force.POrho2 );
+        pirho.v = vec_set( pi[0]->rho , pi[1]->rho , pi[2]->rho , pi[3]->rho );
+        pjrho.v = vec_set( pj[0]->rho , pj[1]->rho , pj[2]->rho , pj[3]->rho );
+        ci.v = vec_set( pi[0]->force.c , pi[1]->force.c , pi[2]->force.c , pi[3]->force.c );
+        cj.v = vec_set( pj[0]->force.c , pj[1]->force.c , pj[2]->force.c , pj[3]->force.c );
+        vi_sig.v = vec_set( pi[0]->force.v_sig , pi[1]->force.v_sig , pi[2]->force.v_sig , pi[3]->force.v_sig );
+        vj_sig.v = vec_set( pj[0]->force.v_sig , pj[1]->force.v_sig , pj[2]->force.v_sig , pj[3]->force.v_sig );
         for ( k = 0 ; k < 3 ; k++ ) {
-            vi[k].v = _mm_set_ps( pi[3]->v[k] , pi[2]->v[k] , pi[1]->v[k] , pi[0]->v[k] );
-            vj[k].v = _mm_set_ps( pj[3]->v[k] , pj[2]->v[k] , pj[1]->v[k] , pj[0]->v[k] );
+            vi[k].v = vec_set( pi[0]->v[k] , pi[1]->v[k] , pi[2]->v[k] , pi[3]->v[k] );
+            vj[k].v = vec_set( pj[0]->v[k] , pj[1]->v[k] , pj[2]->v[k] , pj[3]->v[k] );
             }
         for ( k = 0 ; k < 3 ; k++ )
-            dx[k].v = _mm_set_ps( Dx[9+k] , Dx[6+k] , Dx[3+k] , Dx[0+k] );
-        balsara.v = _mm_set_ps( pi[3]->force.balsara , pi[2]->force.balsara , pi[1]->force.balsara , pi[0]->force.balsara ) +
-                    _mm_set_ps( pj[3]->force.balsara , pj[2]->force.balsara , pj[1]->force.balsara , pj[0]->force.balsara );
+            dx[k].v = vec_set( Dx[0+k] , Dx[3+k] , Dx[6+k] , Dx[9+k] );
+        balsara.v = vec_set( pi[0]->force.balsara , pi[1]->force.balsara , pi[2]->force.balsara , pi[3]->force.balsara ) +
+                    vec_set( pj[0]->force.balsara , pj[1]->force.balsara , pj[2]->force.balsara , pj[3]->force.balsara );
     #else
         #error
     #endif