diff --git a/src/cell.c b/src/cell.c
index 3ee5bf583215bffc2aa0a0e4b7a516fda6371cc4..abdeda8a3b0844567fb19dacb9f9a83a65cd6a20 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -167,7 +167,7 @@ void cell_unlocktree( struct cell *c ) {
  
 void cell_split ( struct cell *c  ) {
 
-    int i, j, k, kk;
+    int i, j, k;
     struct part temp, *parts = c->parts;
     int left[8], right[8];
     double pivot[3];
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index eeed316f0fe89ae5b419fbfed9bd2c5694ff7331..896aa2dc75f7b862415d240bee238464fd5c3f58 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -940,6 +940,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
 
                 /* Get a pointer to the jth particle. */
                 cpj = &cparts_j[ sortdt_j[pjd].i ];
+                hj = cpj->h;
 
                 /* Compute the pairwise distance. */
                 r2 = 0.0f;
@@ -953,7 +954,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
 
                     #ifndef VECTORIZE
 
-                        IACT_NONSYM( r2 , dx , cpj->h , hi , &parts_j[ sortdt_j[pjd].i ] , pi );
+                        IACT_NONSYM( r2 , dx , hj , hi , &parts_j[ sortdt_j[pjd].i ] , pi );
 
                     #else
 
@@ -962,7 +963,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
                         dxq1[3*icount1+0] = dx[0];
                         dxq1[3*icount1+1] = dx[1];
                         dxq1[3*icount1+2] = dx[2];
-                        hiq1[icount1] = cpj->h;
+                        hiq1[icount1] = hj;
                         hjq1[icount1] = hi;
                         piq1[icount1] = &parts_j[ sortdt_j[ pjd ].i ];
                         pjq1[icount1] = pi;
@@ -990,6 +991,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
 
                 /* Get a pointer to the jth particle. */
                 cpj = &cparts_j[ sort_j[pjd].i ];
+                hj = cpj->h;
 
                 /* Compute the pairwise distance. */
                 r2 = 0.0f;
@@ -1005,9 +1007,9 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
 
                         /* Does pj need to be updated too? */
                         if ( cpj->dt <= dt_step )
-                            IACT( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[pjd].i ] );
+                            IACT( r2 , dx , hi , hj , pi , &parts_j[ sort_j[pjd].i ] );
                         else
-                            IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[pjd].i ] );
+                            IACT_NONSYM( r2 , dx , hi , hj , pi , &parts_j[ sort_j[pjd].i ] );
 
                     #else
 
@@ -1020,7 +1022,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
                             dxq2[3*icount2+1] = dx[1];
                             dxq2[3*icount2+2] = dx[2];
                             hiq2[icount2] = hi;
-                            hjq2[icount2] = cpj->h;
+                            hjq2[icount2] = hj;
                             piq2[icount2] = pi;
                             pjq2[icount2] = &parts_j[ sort_j[ pjd ].i ];
                             icount2 += 1;
@@ -1041,7 +1043,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
                             dxq1[3*icount1+1] = dx[1];
                             dxq1[3*icount1+2] = dx[2];
                             hiq1[icount1] = hi;
-                            hjq1[icount1] = cpj->h;
+                            hjq1[icount1] = hj;
                             piq1[icount1] = pi;
                             pjq1[icount1] = &parts_j[ sort_j[ pjd ].i ];
                             icount1 += 1;
@@ -1090,6 +1092,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
 
                 /* Get a pointer to the jth particle. */
                 cpi = &cparts_i[ sortdt_i[pid].i ];
+                hi = cpi->h;
 
                 /* Compute the pairwise distance. */
                 r2 = 0.0f;
@@ -1099,11 +1102,11 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
                     }
 
                 /* Hit or miss? */
-                if ( r2 < hjg2 && r2 > cpi->h*cpi->h*kernel_gamma2 ) {
+                if ( r2 < hjg2 && r2 > hi*hi*kernel_gamma2 ) {
 
                     #ifndef VECTORIZE
 
-                        IACT_NONSYM( r2 , dx , cpi->h , hj , &parts_i[ sortdt_i[pid].i ] , pj );
+                        IACT_NONSYM( r2 , dx , hi , hj , &parts_i[ sortdt_i[pid].i ] , pj );
 
                     #else
 
@@ -1112,7 +1115,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
                         dxq1[3*icount1+0] = dx[0];
                         dxq1[3*icount1+1] = dx[1];
                         dxq1[3*icount1+2] = dx[2];
-                        hiq1[icount1] = cpi->h;
+                        hiq1[icount1] = hi;
                         hjq1[icount1] = hj;
                         piq1[icount1] = &parts_i[ sortdt_i[ pid ].i ];
                         pjq1[icount1] = pj;
@@ -1139,6 +1142,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
 
                 /* Get a pointer to the jth particle. */
                 cpi = &cparts_i[ sort_i[pid].i ];
+                hi = cpi->h;
 
                 /* Compute the pairwise distance. */
                 r2 = 0.0f;
@@ -1148,15 +1152,15 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
                     }
 
                 /* Hit or miss? */
-                if ( r2 < hjg2 && r2 > cpi->h*cpi->h*kernel_gamma2 ) {
+                if ( r2 < hjg2 && r2 > hi*hi*kernel_gamma2 ) {
 
                     #ifndef VECTORIZE
 
                         /* Does pi need to be updated too? */
                         if ( cpi->dt <= dt_step )
-                            IACT( r2 , dx , hj , cpi->h , pj , &parts_i[ sort_i[pid].i ] );
+                            IACT( r2 , dx , hj , hi , pj , &parts_i[ sort_i[pid].i ] );
                         else
-                            IACT_NONSYM( r2 , dx , hj , cpi->h , pj , &parts_i[ sort_i[pid].i ] );
+                            IACT_NONSYM( r2 , dx , hj , hi , pj , &parts_i[ sort_i[pid].i ] );
 
                     #else
 
@@ -1169,7 +1173,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
                             dxq2[3*icount2+1] = dx[1];
                             dxq2[3*icount2+2] = dx[2];
                             hiq2[icount2] = hj;
-                            hjq2[icount2] = cpi->h;
+                            hjq2[icount2] = hi;
                             piq2[icount2] = pj;
                             pjq2[icount2] = &parts_i[ sort_i[ pid ].i ];
                             icount2 += 1;
@@ -1190,7 +1194,7 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
                             dxq1[3*icount1+1] = dx[1];
                             dxq1[3*icount1+2] = dx[2];
                             hiq1[icount1] = hj;
-                            hjq1[icount1] = cpi->h;
+                            hjq1[icount1] = hi;
                             piq1[icount1] = pj;
                             pjq1[icount1] = &parts_i[ sort_i[ pid ].i ];
                             icount1 += 1;
@@ -1243,97 +1247,226 @@ void DOSELF1 ( struct runner *r , struct cell *restrict c ) {
 
     int k, pid, pjd, count = c->count;
     double pix[3];
-    float dx[3], hi, hig2, r2;
+    float dx[3], hi, hj, hig2, r2;
     struct part *restrict parts = c->parts, *restrict pi;
     struct cpart *restrict cpi, *restrict cpj, *restrict cparts = c->cparts;
     float dt_step = r->e->dt_step;
+    int firstdt = 0, countdt = 0, *indt = NULL, doj;
     #ifdef VECTORIZE
-        int icount = 0;
-        float r2q[VEC_SIZE] __attribute__ ((aligned (16)));
-        float hiq[VEC_SIZE] __attribute__ ((aligned (16)));
-        float hjq[VEC_SIZE] __attribute__ ((aligned (16)));
-        float dxq[3*VEC_SIZE] __attribute__ ((aligned (16)));
-        struct part *piq[VEC_SIZE], *pjq[VEC_SIZE];
+        int icount1 = 0;
+        float r2q1[VEC_SIZE] __attribute__ ((aligned (16)));
+        float hiq1[VEC_SIZE] __attribute__ ((aligned (16)));
+        float hjq1[VEC_SIZE] __attribute__ ((aligned (16)));
+        float dxq1[3*VEC_SIZE] __attribute__ ((aligned (16)));
+        struct part *piq1[VEC_SIZE], *pjq1[VEC_SIZE];
+        int icount2 = 0;
+        float r2q2[VEC_SIZE] __attribute__ ((aligned (16)));
+        float hiq2[VEC_SIZE] __attribute__ ((aligned (16)));
+        float hjq2[VEC_SIZE] __attribute__ ((aligned (16)));
+        float dxq2[3*VEC_SIZE] __attribute__ ((aligned (16)));
+        struct part *piq2[VEC_SIZE], *pjq2[VEC_SIZE];
     #endif
     TIMER_TIC
     
-    /* Any work here? */
+    /* Set up indt if needed. */
     if ( c->dt_min > dt_step )
         return;
+    else if ( c->dt_max > dt_step ) {
+        if ( ( indt = (int *)alloca( sizeof(int) * count ) ) == NULL )
+            error( "Failed to allocate indt." );
+        for ( k = 0 ; k < count ; k++ )
+            if ( cparts[k].dt <= dt_step ) {
+                indt[ countdt ] = k;
+                countdt += 1;
+                }
+        }
     
     /* Loop over the particles in the cell. */
     for ( pid = 0 ; pid < count ; pid++ ) {
     
         /* Get a pointer to the ith particle. */
-        cpi = &cparts[pid];
         pi = &parts[pid];
-        if ( cpi->dt > dt_step )
-            continue;
-            
+        cpi = &cparts[pid];
+    
         /* Get the particle position and radius. */
         for ( k = 0 ; k < 3 ; k++ )
             pix[k] = cpi->x[k];
         hi = cpi->h;
         hig2 = hi * hi * kernel_gamma2;
-            
-        /* Loop over the other particles .*/
-        for ( pjd = 0 ; pjd < count ; pjd++ ) {
         
-            /* Skip self-interactions. */
-            if ( pid == pjd )
-                continue;
-        
-            /* Get a pointer to the jth particle. */
-            cpj = &cparts[pjd];                
+        /* Is the ith particle inactive? */
+        if ( cpi->dt > dt_step ) {
         
-            /* Compute the pairwise distance. */
-            r2 = 0.0f;
-            for ( k = 0 ; k < 3 ; k++ ) {
-                dx[k] = pix[k] - cpj->x[k];
-                r2 += dx[k]*dx[k];
-                }
+            /* Loop over the other particles .*/
+            for ( pjd = firstdt ; pjd < countdt ; pjd++ ) {
+
+                /* Get a pointer to the jth particle. */
+                cpj = &cparts[ indt[ pjd ] ];
+                hj = cpj->h;
+
+                /* Compute the pairwise distance. */
+                r2 = 0.0f;
+                for ( k = 0 ; k < 3 ; k++ ) {
+                    dx[k] = cpj->x[k] - pix[k];
+                    r2 += dx[k]*dx[k];
+                    }
+
+                /* Hit or miss? */
+                if ( r2 < hj*hj*kernel_gamma2 ) {
+
+                    #ifndef VECTORIZE
+
+                        IACT_NONSYM( r2 , dx , hj , hi , &parts[indt[pjd]] , pi );
+
+                    #else
+
+                        /* Add this interaction to the queue. */
+                        r2q1[icount1] = r2;
+                        dxq1[3*icount1+0] = dx[0];
+                        dxq1[3*icount1+1] = dx[1];
+                        dxq1[3*icount1+2] = dx[2];
+                        hiq1[icount1] = cpj->h;
+                        hjq1[icount1] = hi;
+                        piq1[icount1] = &parts[indt[pjd]];
+                        pjq1[icount1] = pi;
+                        icount1 += 1;
+
+                        /* Flush? */
+                        if ( icount1 == VEC_SIZE ) {
+                            IACT_NONSYM_VEC( r2q1 , dxq1 , hiq1 , hjq1 , piq1 , pjq1 );
+                            icount1 = 0;
+                            }
+
+                    #endif
+
+                    }
+
+                } /* loop over all other particles. */
                 
-            /* Hit or miss? */
-            if ( r2 < hig2 ) {
+            }
             
-                #ifndef VECTORIZE
+        /* Otherwise, interact with all candidates. */
+        else {
+        
+            /* We caught a live one! */
+            firstdt += 1;
+            
+            /* Loop over the other particles .*/
+            for ( pjd = pid+1 ; pjd < count ; pjd++ ) {
+
+                /* Get a pointer to the jth particle. */
+                cpj = &cparts[pjd];
+                hj = cpj->h;
+
+                /* Compute the pairwise distance. */
+                r2 = 0.0f;
+                for ( k = 0 ; k < 3 ; k++ ) {
+                    dx[k] = pix[k] - cpj->x[k];
+                    r2 += dx[k]*dx[k];
+                    }
+                doj = ( cpj->dt <= dt_step ) && ( r2 < hj*hj*kernel_gamma2 );
+
+                /* Hit or miss? */
+                if ( r2 < hig2 || doj ) {
+
+                    #ifndef VECTORIZE
+
+                        /* Which parts need to be updated? */
+                        if ( r2 < hig2 && doj )
+                            IACT( r2 , dx , hi , hj , pi , &parts[pjd] );
+                        else if ( !doj )
+                            IACT_NONSYM( r2 , dx , hi , hj , pi , &parts[pjd] );
+                        else {
+                            dx[0] = -dx[0]; dx[1] = -dx[1]; dx[2] = -dx[2];
+                            IACT_NONSYM( r2 , dx , hi , hj , pi , &parts[pjd] );
+                            }
+
+                    #else
+
+                        /* Does pj need to be updated too? */
+                        if ( r2 < hig2 && doj ) {
                         
-                    IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts[pjd] );
-                
-                #else
+                            /* Add this interaction to the symmetric queue. */
+                            r2q2[icount2] = r2;
+                            dxq2[3*icount2+0] = dx[0];
+                            dxq2[3*icount2+1] = dx[1];
+                            dxq2[3*icount2+2] = dx[2];
+                            hiq2[icount2] = hi;
+                            hjq2[icount2] = hj;
+                            piq2[icount2] = pi;
+                            pjq2[icount2] = &parts[pjd];
+                            icount2 += 1;
 
-                    /* Add this interaction to the queue. */
-                    r2q[icount] = r2;
-                    dxq[3*icount+0] = dx[0];
-                    dxq[3*icount+1] = dx[1];
-                    dxq[3*icount+2] = dx[2];
-                    hiq[icount] = hi;
-                    hjq[icount] = cpj->h;
-                    piq[icount] = pi;
-                    pjq[icount] = &parts[pjd];
-                    icount += 1;
+                            /* Flush? */
+                            if ( icount2 == VEC_SIZE ) {
+                                IACT_VEC( r2q2 , dxq2 , hiq2 , hjq2 , piq2 , pjq2 );
+                                icount2 = 0;
+                                }
+                                
+                            }
+                            
+                        else if ( !doj ) {
+                        
+                            /* Add this interaction to the non-symmetric queue. */
+                            r2q1[icount1] = r2;
+                            dxq1[3*icount1+0] = dx[0];
+                            dxq1[3*icount1+1] = dx[1];
+                            dxq1[3*icount1+2] = dx[2];
+                            hiq1[icount1] = hi;
+                            hjq1[icount1] = hj;
+                            piq1[icount1] = pi;
+                            pjq1[icount1] = &parts[pjd];
+                            icount1 += 1;
 
-                    /* Flush? */
-                    if ( icount == VEC_SIZE ) {
-                        IACT_NONSYM_VEC( r2q , dxq , hiq , hjq , piq , pjq );
-                        icount = 0;
-                        }
+                            /* Flush? */
+                            if ( icount1 == VEC_SIZE ) {
+                                IACT_NONSYM_VEC( r2q1 , dxq1 , hiq1 , hjq1 , piq1 , pjq1 );
+                                icount1 = 0;
+                                }
+                                
+                            }
 
-                #endif
-                
-                }
-        
-            } /* loop over all other particles. */
+                        else  {
+                        
+                            /* Add this interaction to the non-symmetric queue. */
+                            r2q1[icount1] = r2;
+                            dxq1[3*icount1+0] = -dx[0];
+                            dxq1[3*icount1+1] = -dx[1];
+                            dxq1[3*icount1+2] = -dx[2];
+                            hiq1[icount1] = hj;
+                            hjq1[icount1] = hi;
+                            piq1[icount1] = &parts[pjd];
+                            pjq1[icount1] = pi;
+                            icount1 += 1;
+
+                            /* Flush? */
+                            if ( icount1 == VEC_SIZE ) {
+                                IACT_NONSYM_VEC( r2q1 , dxq1 , hiq1 , hjq1 , piq1 , pjq1 );
+                                icount1 = 0;
+                                }
+                                
+                            }
+
+                    #endif
+
+                    }
+
+                } /* loop over all other particles. */
+            
+            }
     
         } /* loop over all particles. */
 
     #ifdef VECTORIZE
     /* Pick up any leftovers. */
-    if ( icount > 0 )
-        for ( k = 0 ; k < icount ; k++ )
-            IACT_NONSYM( r2q[k] , &dxq[3*k] , hiq[k] , hjq[k] , piq[k] , pjq[k] );
+    if ( icount1 > 0 )
+        for ( k = 0 ; k < icount1 ; k++ )
+            IACT_NONSYM( r2q1[k] , &dxq1[3*k] , hiq1[k] , hjq1[k] , piq1[k] , pjq1[k] );
+    if ( icount2 > 0 )
+        for ( k = 0 ; k < icount2 ; k++ )
+            IACT( r2q2[k] , &dxq2[3*k] , hiq2[k] , hjq2[k] , piq2[k] , pjq2[k] );
     #endif
-        
+    
     #ifdef TIMER_VERBOSE
         printf( "runner_doself1[%02i]: %i parts at depth %i took %.3f ms.\n" , r->id , count , c->depth , ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000 );
     #else
@@ -1347,7 +1480,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
 
     int k, pid, pjd, count = c->count;
     double pix[3];
-    float dx[3], hi, hig2, r2;
+    float dx[3], hi, hj, hig2, r2;
     struct part *restrict parts = c->parts, *restrict pi;
     struct cpart *restrict cpi, *restrict cpj, *restrict cparts = c->cparts;
     float dt_step = r->e->dt_step;
@@ -1402,6 +1535,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
 
                 /* Get a pointer to the jth particle. */
                 cpj = &cparts[ indt[ pjd ] ];
+                hj = cpj->h;
 
                 /* Compute the pairwise distance. */
                 r2 = 0.0f;
@@ -1411,11 +1545,11 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
                     }
 
                 /* Hit or miss? */
-                if ( r2 < hig2 || r2 < cpj->h*cpj->h*kernel_gamma2 ) {
+                if ( r2 < hig2 || r2 < hj*hj*kernel_gamma2 ) {
 
                     #ifndef VECTORIZE
 
-                        IACT_NONSYM( r2 , dx , cpj->h , hi , &parts[indt[pjd]] , pi );
+                        IACT_NONSYM( r2 , dx , hj , hi , &parts[indt[pjd]] , pi );
 
                     #else
 
@@ -1424,7 +1558,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
                         dxq1[3*icount1+0] = dx[0];
                         dxq1[3*icount1+1] = dx[1];
                         dxq1[3*icount1+2] = dx[2];
-                        hiq1[icount1] = cpj->h;
+                        hiq1[icount1] = hj;
                         hjq1[icount1] = hi;
                         piq1[icount1] = &parts[indt[pjd]];
                         pjq1[icount1] = pi;
@@ -1455,6 +1589,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
 
                 /* Get a pointer to the jth particle. */
                 cpj = &cparts[pjd];
+                hj = cpj->h;
 
                 /* Compute the pairwise distance. */
                 r2 = 0.0f;
@@ -1464,15 +1599,15 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
                     }
 
                 /* Hit or miss? */
-                if ( r2 < hig2 || r2 < cpj->h*cpj->h*kernel_gamma2 ) {
+                if ( r2 < hig2 || r2 < hj*hj*kernel_gamma2 ) {
 
                     #ifndef VECTORIZE
 
                         /* Does pj need to be updated too? */
                         if ( cpj->dt <= dt_step )
-                            IACT( r2 , dx , hi , cpj->h , pi , &parts[pjd] );
+                            IACT( r2 , dx , hi , hj , pi , &parts[pjd] );
                         else
-                            IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts[pjd] );
+                            IACT_NONSYM( r2 , dx , hi , hj , pi , &parts[pjd] );
 
                     #else
 
@@ -1485,7 +1620,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
                             dxq2[3*icount2+1] = dx[1];
                             dxq2[3*icount2+2] = dx[2];
                             hiq2[icount2] = hi;
-                            hjq2[icount2] = cpj->h;
+                            hjq2[icount2] = hj;
                             piq2[icount2] = pi;
                             pjq2[icount2] = &parts[pjd];
                             icount2 += 1;
@@ -1506,7 +1641,7 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
                             dxq1[3*icount1+1] = dx[1];
                             dxq1[3*icount1+2] = dx[2];
                             hiq1[icount1] = hi;
-                            hjq1[icount1] = cpj->h;
+                            hjq1[icount1] = hj;
                             piq1[icount1] = pi;
                             pjq1[icount1] = &parts[pjd];
                             icount1 += 1;
diff --git a/src/space.h b/src/space.h
index 43c9837751e029ea47b8549f354becd30a326145..9b9308c8b2a7e1dae49e5d22f8a1f94b9d51ac6a 100644
--- a/src/space.h
+++ b/src/space.h
@@ -26,7 +26,7 @@
 #define space_splitratio                0.875f
 #define space_splitsize_default         400
 #define space_subsize_default           5000
-#define space_dosub                     1
+#define space_dosub                     0
 #define space_stretch                   1.05f
 #define space_maxtaskspercell           31
 #define space_maxreldx                  0.2f
diff --git a/src/vector.h b/src/vector.h
index 65f0d30c67601e8d17d8347e508aacdd30fffcc2..39f1385e57b5fec3cfc30911c9ed900d6ba78524 100644
--- a/src/vector.h
+++ b/src/vector.h
@@ -27,7 +27,7 @@
     #define VEC_MACRO(elcount, type)  __attribute__((vector_size((elcount)*sizeof(type)))) type
 
     /* So what will the vector size be? */
-    #ifdef NO__AVX__
+    #ifdef __AVX__
         #define VECTORIZE
         #define VEC_SIZE 8
         #define VEC_FLOAT __m256
@@ -54,7 +54,7 @@
         #define vec_dbl_ftoi(a) _mm256_cvttpd_epi32(a)
         #define vec_dbl_fmin(a,b) _mm256_min_pd(a,b)
         #define vec_dbl_fmax(a,b) _mm256_max_pd(a,b)
-    #elif defined( NO__SSE2__ )
+    #elif defined( __SSE2__ )
         #define VECTORIZE
         #define VEC_SIZE 4
         #define VEC_FLOAT __m128