diff --git a/examples/EAGLE_25/eagle_25.yml b/examples/EAGLE_25/eagle_25.yml
index 5dee9dad0b5d7f694c61fa4c983ead0f1cd6e5e2..f575254301fdd315e7576d7ecbbfbf8f94d7f0c3 100644
--- a/examples/EAGLE_25/eagle_25.yml
+++ b/examples/EAGLE_25/eagle_25.yml
@@ -13,6 +13,9 @@ TimeIntegration:
   dt_min:     1e-10 # The minimal time-step size of the simulation (in internal units).
   dt_max:     1e-4  # The maximal time-step size of the simulation (in internal units).
 
+Scheduler:
+  cell_split_size: 64
+
 # Parameters governing the snapshots
 Snapshots:
   basename:            eagle # Common part of the name of output files
diff --git a/src/align.h b/src/align.h
index 915af33e6e2ba59be1a0849c4de0e2f1bd5b0d96..e1ad706d598eb69fd191ffbccd7be356411ce062 100644
--- a/src/align.h
+++ b/src/align.h
@@ -23,9 +23,52 @@
  * @brief The default struct alignment in SWIFT.
  */
 #define SWIFT_STRUCT_ALIGNMENT 32
+
 /**
  * @brief Defines alignment of structures
  */
 #define SWIFT_STRUCT_ALIGN __attribute__((aligned(SWIFT_STRUCT_ALIGNMENT)))
 
+/**
+ * @brief The default cache alignment in SWIFT.
+ */
+#define SWIFT_CACHE_ALIGNMENT 64
+
+/**
+ * @brief Defines alignment of caches
+ */
+#define SWIFT_CACHE_ALIGN __attribute__((aligned(SWIFT_CACHE_ALIGNMENT)))
+
+/**
+ * @brief Macro to tell the compiler that a given array has the specified alignment.
+ *
+ * Note that this turns into a no-op but gives information to the compiler.
+ *
+ * @param array The array.
+ * @param alignment The alignment in bytes of the array.
+ */
+#if defined(__ICC)
+#define swift_align_information(array, alignment)        \
+  __assume_aligned(array, alignment);
+#elif defined(__GNUC__)
+#define swift_align_information(array, alignment)        \
+  array = __builtin_assume_aligned(array, alignment);
+#endif
+
+/**
+ * @brief Macro to tell the compiler that a given number is 0 modulo a given size.
+ *
+ * Note that this turns into a no-op but gives information to the compiler.
+ * GCC does not have the equivalent built-in so defaults to nothing.
+ *
+ * @param var The variable
+ * @param size The modulo of interest.
+ */
+#if defined(__ICC)
+#define swift_assume_size(var, size)   \
+  __assume(var % size == 0);
+#else
+#define swift_assume_size(var, size)   ;
+#endif
+
 #endif /* SWIFT_ALIGN_H */
diff --git a/src/engine.c b/src/engine.c
index 2adddee54efcb21fc4b9b5db116149e519e63b8f..c158e36a81f630c5c1244409f260ce9a68307e3d 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -4543,6 +4543,10 @@ void engine_init(struct engine *e, struct space *s,
     e->runners[k].cj_cache.count = 0;
     cache_init(&e->runners[k].ci_cache, CACHE_SIZE);
     cache_init(&e->runners[k].cj_cache, CACHE_SIZE);
+    e->runners[k].ci_gravity_cache.count = 0;
+    e->runners[k].cj_gravity_cache.count = 0;
+    gravity_cache_init(&e->runners[k].ci_gravity_cache, space_splitsize);
+    gravity_cache_init(&e->runners[k].cj_gravity_cache, space_splitsize);
 #endif
 
     if (verbose) {
@@ -4629,8 +4633,12 @@ void engine_compute_next_snapshot_time(struct engine *e) {
 void engine_clean(struct engine *e) {
 
 #ifdef WITH_VECTORIZATION
-  for (int i = 0; i < e->nr_threads; ++i) cache_clean(&e->runners[i].ci_cache);
-  for (int i = 0; i < e->nr_threads; ++i) cache_clean(&e->runners[i].cj_cache);
+  for (int i = 0; i < e->nr_threads; ++i) {
+    cache_clean(&e->runners[i].ci_cache);
+    cache_clean(&e->runners[i].cj_cache);
+    gravity_cache_clean(&e->runners[i].ci_gravity_cache);
+    gravity_cache_clean(&e->runners[i].cj_gravity_cache);
+  }
 #endif
   free(e->runners);
   free(e->snapshotUnits);
diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h
index d4a95540de17631ad445075d672d03a1236e34e3..4d0c3f4c4d58bc2183f6244428bfaa5fb3ea2c2c 100644
--- a/src/gravity/Default/gravity_iact.h
+++ b/src/gravity/Default/gravity_iact.h
@@ -249,4 +249,5 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pp_nonsym(
   gpi->a_grav[2] -= fdx[2];
 }
 
+
 #endif /* SWIFT_DEFAULT_GRAVITY_IACT_H */
diff --git a/src/runner.h b/src/runner.h
index facadf1608fb7e06af952eedbf1151fa68530bef..aae7f91d8e4bacd0e97a5c70b1c297a156166c03 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -27,6 +27,7 @@
 #include "../config.h"
 
 /* Includes. */
+#include "gravity_cache.h"
 #include "cache.h"
 
 struct cell;
@@ -50,6 +51,12 @@ struct runner {
   struct engine *e;
 
 #ifdef WITH_VECTORIZATION
+  /*! The particle gravity_cache of cell ci. */
+  struct gravity_cache ci_gravity_cache;
+
+  /*! The particle gravity_cache of cell cj. */
+  struct gravity_cache cj_gravity_cache;
+
   /*! The particle cache of cell ci. */
   struct cache ci_cache;
 
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 0724f24c2745ab015bd79fdc0a3d7830bd8d76e0..c5795d5a8e5dea42ebfbae3bf91d2e8ec6768f4a 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -162,6 +162,224 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci,
 void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci,
                                 struct cell *cj, double shift[3]) {
 
+  /* Some constants */
+  const struct engine *const e = r->e;
+  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
+  struct gravity_cache *const cj_cache = &r->cj_gravity_cache;
+
+  /* Cell properties */
+  const int gcount_i = ci->gcount;
+  const int gcount_j = cj->gcount;
+  struct gpart *restrict gparts_i = ci->gparts;
+  struct gpart *restrict gparts_j = cj->gparts;
+  const int ci_active = cell_is_active(ci, e);
+  const int cj_active = cell_is_active(cj, e);
+  
+  /* Anything to do here ?*/
+  if (!ci_active && !cj_active) return;
+
+  /* Check that we fit in cache */
+  if(gcount_i > ci_cache->count || gcount_j > cj_cache->count)
+    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", 
+	  gcount_i, gcount_j);
+
+  /* Computed the padded counts */
+  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
+  const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
+
+  /* Fill the caches */
+  gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i, ci_active, shift);
+  gravity_cache_populate_no_shift(cj_cache, gparts_j, gcount_j, gcount_padded_j, cj_active);
+
+  /* Ok... Here we go ! */
+
+  if(ci_active) {
+
+    /* Loop over all particles in ci... */
+    for (int pid = 0; pid < gcount_i; pid++) {
+
+      const float x_i = ci_cache->x[pid];
+      const float y_i = ci_cache->y[pid];
+      const float z_i = ci_cache->z[pid];
+      
+      /* Some powers of the softening length */
+      const float h_i = ci_cache->epsilon[pid];
+      const float h2_i = h_i * h_i;
+      const float h_inv_i = 1.f / h_i;
+      const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
+
+      /* Local accumulators for the acceleration */
+      float a_x = 0.f, a_y = 0.f, a_z = 0.f;
+
+      /* Make the compiler understand we are in happy vectorization land */
+      swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT);
+      swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT);
+      swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT);
+      swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT);
+      swift_assume_size(gcount_padded_j, VEC_SIZE);
+
+      /* Loop over every particle in the other cell. */
+      for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
+
+	/* Get info about j */
+	const float x_j = cj_cache->x[pjd];
+	const float y_j = cj_cache->y[pjd];
+	const float z_j = cj_cache->z[pjd];
+	const float mass_j = cj_cache->m[pjd];
+
+        /* Compute the pairwise (square) distance. */
+	const float dx = x_i - x_j;
+	const float dy = y_i - y_j;
+	const float dz = z_i - z_j;
+	const float r2 = dx*dx + dy*dy + dz*dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+	if (r2 == 0.f) error("Interacting particles with 0 distance");
+
+        /* Check that particles have been drifted to the current time */
+        if (gparts_i[pid].ti_drift != e->ti_current)
+          error("gpi not drifted to current time");
+        if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
+          error("gpj not drifted to current time");
+#endif
+
+	/* Get the inverse distance */
+	const float r_inv = 1.f / sqrtf(r2);
+	
+	float f_ij, W_ij;
+
+	if (r2 >= h2_i) {
+	  
+	  /* Get Newtonian gravity */
+	  f_ij = mass_j * r_inv * r_inv * r_inv;
+	  
+	} else {
+	  
+	  const float r = r2 * r_inv;	  
+	  const float ui = r * h_inv_i;
+	    
+	  kernel_grav_eval(ui, &W_ij);
+	    
+	  /* Get softened gravity */
+	  f_ij = mass_j * h_inv3_i * W_ij;
+	}
+
+	/* Store it back */
+	a_x -= f_ij * dx;
+	a_y -= f_ij * dy;
+	a_z -= f_ij * dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        gparts_i[pid].num_interacted++;
+#endif
+
+      }
+
+      /* Store everything back in cache */
+      ci_cache->a_x[pid] += a_x;
+      ci_cache->a_y[pid] += a_y;
+      ci_cache->a_z[pid] += a_z;
+    }
+  }
+
+  /* Now do the opposite loop */
+  if(cj_active) {
+
+    /* Loop over all particles in ci... */
+    for (int pjd = 0; pjd < gcount_j; pjd++) {
+
+      const float x_j = cj_cache->x[pjd];
+      const float y_j = cj_cache->y[pjd];
+      const float z_j = cj_cache->z[pjd];
+      
+      /* Some powers of the softening length */
+      const float h_j = cj_cache->epsilon[pjd];
+      const float h2_j = h_j * h_j;
+      const float h_inv_j = 1.f / h_j;
+      const float h_inv3_j = h_inv_j * h_inv_j * h_inv_j;
+
+      /* Local accumulators for the acceleration */
+      float a_x = 0.f, a_y = 0.f, a_z = 0.f;
+
+      /* Make the compiler understand we are in happy vectorization land */
+      swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT);
+      swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT);
+      swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT);
+      swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT);
+      swift_assume_size(gcount_padded_i, VEC_SIZE);
+
+      /* Loop over every particle in the other cell. */
+      for (int pid = 0; pid < gcount_padded_i; pid++) {
+
+	/* Get info about j */
+	const float x_i = ci_cache->x[pid];
+	const float y_i = ci_cache->y[pid];
+	const float z_i = ci_cache->z[pid];
+	const float mass_i = ci_cache->m[pid];
+
+        /* Compute the pairwise (square) distance. */
+	const float dx = x_j - x_i;
+	const float dy = y_j - y_i;
+	const float dz = z_j - z_i;
+	const float r2 = dx*dx + dy*dy + dz*dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+	if (r2 == 0.f) error("Interacting particles with 0 distance");
+
+        /* Check that particles have been drifted to the current time */
+        if (gparts_j[pjd].ti_drift != e->ti_current)
+          error("gpj not drifted to current time");
+        if (pid < gcount_i && gparts_i[pid].ti_drift != e->ti_current)
+          error("gpi not drifted to current time");
+#endif
+
+	/* Get the inverse distance */
+	const float r_inv = 1.f / sqrtf(r2);
+	
+	float f_ji, W_ji;
+
+	if (r2 >= h2_j) {
+	  
+	  /* Get Newtonian gravity */
+	  f_ji = mass_i * r_inv * r_inv * r_inv;
+	  
+	} else {
+	  
+	  const float r = r2 * r_inv;	  
+	  const float uj = r * h_inv_j;
+	    
+	  kernel_grav_eval(uj, &W_ji);
+	    
+	  /* Get softened gravity */
+	  f_ji = mass_i * h_inv3_j * W_ji;
+	}
+
+	/* Store it back */
+	a_x -= f_ji * dx;
+	a_y -= f_ji * dy;
+	a_z -= f_ji * dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        gparts_j[pjd].num_interacted++;
+#endif
+
+      }
+
+      /* Store everything back in cache */
+      cj_cache->a_x[pjd] += a_x;
+      cj_cache->a_y[pjd] += a_y;
+      cj_cache->a_z[pjd] += a_z;
+    }
+  }
+
+  /* Write back to the particles */
+  if(ci_active)
+    gravity_cache_write_back(ci_cache, gparts_i, gcount_i);
+  if(cj_active)
+    gravity_cache_write_back(cj_cache, gparts_j, gcount_j);
+  
+#ifdef MATTHIEU_OLD_STUFF
+
   /* Some constants */
   const struct engine *const e = r->e;
 
@@ -258,6 +476,7 @@ void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci,
       }
     }
   }
+#endif
 }
 
 /**
@@ -273,6 +492,241 @@ void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci,
 void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci,
                                      struct cell *cj, double shift[3]) {
 
+
+  /* Some constants */
+  const struct engine *const e = r->e;
+  const struct space *s = e->s;
+  const double cell_width = s->width[0];
+  const double a_smooth = e->gravity_properties->a_smooth;
+  const double rlr = cell_width * a_smooth;
+  const float rlr_inv = 1. / rlr;
+
+  /* Caches to play with */
+  struct gravity_cache *const ci_cache = &r->ci_gravity_cache;
+  struct gravity_cache *const cj_cache = &r->cj_gravity_cache;
+
+  /* Cell properties */
+  const int gcount_i = ci->gcount;
+  const int gcount_j = cj->gcount;
+  struct gpart *restrict gparts_i = ci->gparts;
+  struct gpart *restrict gparts_j = cj->gparts;
+  const int ci_active = cell_is_active(ci, e);
+  const int cj_active = cell_is_active(cj, e);
+  
+  /* Anything to do here ?*/
+  if (!ci_active && !cj_active) return;
+
+  /* Check that we fit in cache */
+  if(gcount_i > ci_cache->count || gcount_j > cj_cache->count)
+    error("Not enough space in the caches! gcount_i=%d gcount_j=%d", 
+	  gcount_i, gcount_j);
+
+  /* Computed the padded counts */
+  const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE;
+  const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE;
+
+  /* Fill the caches */
+  gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i, ci_active, shift);
+  gravity_cache_populate_no_shift(cj_cache, gparts_j, gcount_j, gcount_padded_j, cj_active);
+
+  /* Ok... Here we go ! */
+
+  if(ci_active) {
+
+    /* Loop over all particles in ci... */
+    for (int pid = 0; pid < gcount_i; pid++) {
+
+      const float x_i = ci_cache->x[pid];
+      const float y_i = ci_cache->y[pid];
+      const float z_i = ci_cache->z[pid];
+      
+      /* Some powers of the softening length */
+      const float h_i = ci_cache->epsilon[pid];
+      const float h2_i = h_i * h_i;
+      const float h_inv_i = 1.f / h_i;
+      const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i;
+
+      /* Local accumulators for the acceleration */
+      float a_x = 0.f, a_y = 0.f, a_z = 0.f;
+
+      /* Make the compiler understand we are in happy vectorization land */
+      swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT);
+      swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT);
+      swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT);
+      swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT);
+      swift_assume_size(gcount_padded_j, VEC_SIZE);
+
+      /* Loop over every particle in the other cell. */
+      for (int pjd = 0; pjd < gcount_padded_j; pjd++) {
+
+	/* Get info about j */
+	const float x_j = cj_cache->x[pjd];
+	const float y_j = cj_cache->y[pjd];
+	const float z_j = cj_cache->z[pjd];
+	const float mass_j = cj_cache->m[pjd];
+
+        /* Compute the pairwise (square) distance. */
+	const float dx = x_i - x_j;
+	const float dy = y_i - y_j;
+	const float dz = z_i - z_j;
+	const float r2 = dx*dx + dy*dy + dz*dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+	if (r2 == 0.f) error("Interacting particles with 0 distance");
+
+        /* Check that particles have been drifted to the current time */
+        if (gparts_i[pid].ti_drift != e->ti_current)
+          error("gpi not drifted to current time");
+        if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current)
+          error("gpj not drifted to current time");
+#endif
+
+	/* Get the inverse distance */
+	const float r_inv = 1.f / sqrtf(r2);	
+	const float r = r2 * r_inv;	  
+
+	float f_ij, W_ij, corr_lr;
+
+	if (r2 >= h2_i) {
+	  
+	  /* Get Newtonian gravity */
+	  f_ij = mass_j * r_inv * r_inv * r_inv;
+	  
+	} else {
+	  
+	  const float ui = r * h_inv_i;
+	    
+	  kernel_grav_eval(ui, &W_ij);
+	    
+	  /* Get softened gravity */
+	  f_ij = mass_j * h_inv3_i * W_ij;
+	}
+
+	/* Get long-range correction */
+	const float u_lr = r * rlr_inv;
+	kernel_long_grav_eval(u_lr, &corr_lr);
+	f_ij *= corr_lr;
+
+	/* Store it back */
+	a_x -= f_ij * dx;
+	a_y -= f_ij * dy;
+	a_z -= f_ij * dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        gparts_i[pid].num_interacted++;
+#endif
+
+      }
+
+      /* Store everything back in cache */
+      ci_cache->a_x[pid] += a_x;
+      ci_cache->a_y[pid] += a_y;
+      ci_cache->a_z[pid] += a_z;
+    }
+  }
+
+  /* Now do the opposite loop */
+  if(cj_active) {
+
+    /* Loop over all particles in ci... */
+    for (int pjd = 0; pjd < gcount_j; pjd++) {
+
+      const float x_j = cj_cache->x[pjd];
+      const float y_j = cj_cache->y[pjd];
+      const float z_j = cj_cache->z[pjd];
+      
+      /* Some powers of the softening length */
+      const float h_j = cj_cache->epsilon[pjd];
+      const float h2_j = h_j * h_j;
+      const float h_inv_j = 1.f / h_j;
+      const float h_inv3_j = h_inv_j * h_inv_j * h_inv_j;
+
+      /* Local accumulators for the acceleration */
+      float a_x = 0.f, a_y = 0.f, a_z = 0.f;
+
+      /* Make the compiler understand we are in happy vectorization land */
+      swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT);
+      swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT);
+      swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT);
+      swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT);
+      swift_assume_size(gcount_padded_i, VEC_SIZE);
+
+      /* Loop over every particle in the other cell. */
+      for (int pid = 0; pid < gcount_padded_i; pid++) {
+
+	/* Get info about j */
+	const float x_i = ci_cache->x[pid];
+	const float y_i = ci_cache->y[pid];
+	const float z_i = ci_cache->z[pid];
+	const float mass_i = ci_cache->m[pid];
+
+        /* Compute the pairwise (square) distance. */
+	const float dx = x_j - x_i;
+	const float dy = y_j - y_i;
+	const float dz = z_j - z_i;
+	const float r2 = dx*dx + dy*dy + dz*dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+	if (r2 == 0.f) error("Interacting particles with 0 distance");
+
+        /* Check that particles have been drifted to the current time */
+        if (gparts_j[pjd].ti_drift != e->ti_current)
+          error("gpj not drifted to current time");
+        if (pid < gcount_i && gparts_i[pid].ti_drift != e->ti_current)
+          error("gpi not drifted to current time");
+#endif
+
+	/* Get the inverse distance */
+	const float r_inv = 1.f / sqrtf(r2);
+	const float r = r2 * r_inv;	  
+	
+	float f_ji, W_ji, corr_lr;
+
+	if (r2 >= h2_j) {
+	  
+	  /* Get Newtonian gravity */
+	  f_ji = mass_i * r_inv * r_inv * r_inv;
+	  
+	} else {
+	  
+	  const float uj = r * h_inv_j;
+	    
+	  kernel_grav_eval(uj, &W_ji);
+	    
+	  /* Get softened gravity */
+	  f_ji = mass_i * h_inv3_j * W_ji;
+	}
+
+	/* Get long-range correction */
+	const float u_lr = r * rlr_inv;
+	kernel_long_grav_eval(u_lr, &corr_lr);
+	f_ji *= corr_lr;
+
+	/* Store it back */
+	a_x -= f_ji * dx;
+	a_y -= f_ji * dy;
+	a_z -= f_ji * dz;
+
+#ifdef SWIFT_DEBUG_CHECKS
+        gparts_j[pjd].num_interacted++;
+#endif
+
+      }
+
+      /* Store everything back in cache */
+      cj_cache->a_x[pjd] += a_x;
+      cj_cache->a_y[pjd] += a_y;
+      cj_cache->a_z[pjd] += a_z;
+    }
+  }
+
+  /* Write back to the particles */
+  if(ci_active)
+    gravity_cache_write_back(ci_cache, gparts_i, gcount_i);
+  if(cj_active)
+    gravity_cache_write_back(cj_cache, gparts_j, gcount_j);
+
+#ifdef MATTHIEU_OLD_STUFF
   /* Some constants */
   const struct engine *const e = r->e;
   const struct space *s = e->s;
@@ -374,6 +828,8 @@ void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci,
       }
     }
   }
+
+#endif
 }
 
 /**
diff --git a/src/vector.h b/src/vector.h
index 7d82b9f5c9e2bc6e1fea9426ab3870eeec180408..6d5389039c028e4307d4e559dd6567cce6c25392 100644
--- a/src/vector.h
+++ b/src/vector.h
@@ -23,8 +23,12 @@
 /* Have I already read this file? */
 #ifndef VEC_MACRO
 
+/* Config parameters. */
 #include "../config.h"
 
+/* Local headers */
+#include "inline.h"
+
 #ifdef WITH_VECTORIZATION
 
 /* Need to check whether compiler supports this (IBM does not)