diff --git a/AUTHORS b/AUTHORS
index 32597ffd146795656286b9528656ab013c519c30..4d43745609fc9d0ac2f149256a76ed6c581c9144 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -3,3 +3,8 @@ Matthieu Schaller       matthieu.schaller@durham.ac.uk
 Aidan Chalk             aidan.chalk@durham.ac.uk
 Peter W. Draper         p.w.draper@durham.ac.uk
 Bert Vandenbrouck       bert.vandenbroucke@gmail.com
+James S. Willis 	james.s.willis@durham.ac.uk
+John A. Regan 		john.a.regan@durham.ac.uk
+Angus Lepper		angus.lepper@ed.ac.uk
+Tom Theuns 		tom.theuns@durham.ac.uk
+Richard G. Bower	r.g.bower@durham.ac.uk
diff --git a/INSTALL.swift b/INSTALL.swift
index d3993a70814d851a10bd4a9c80fafd58130d039b..c18142c1a62c7c8a011ead2ee8d0d869c9e072ec 100644
--- a/INSTALL.swift
+++ b/INSTALL.swift
@@ -75,26 +75,33 @@ SWIFT depends on a number of third party libraries that should be available
 before you can build it.
 
 
-HDF5: a HDF5 library is required to read and write particle data. One of the
-commands "h5cc" or "h5pcc" should be available. If "h5pcc" is located them a
-parallel HDF5 built for the version of MPI located should be provided. If
-the command is not available then it can be located using the "--with-hfd5"
-configure option. The value should be the full path to the "h5cc" or "h5pcc"
-commands.
+HDF5: a HDF5 library (v. 1.8.x or higher) is required to read and write
+particle data. One of the commands "h5cc" or "h5pcc" should be
+available. If "h5pcc" is located them a parallel HDF5 built for the version
+of MPI located should be provided. If the command is not available then it
+can be located using the "--with-hfd5" configure option. The value should
+be the full path to the "h5cc" or "h5pcc" commands.
 
 
-MPI: an optional MPI library that fully supports MPI_THREAD_MULTIPLE.  
+MPI: an optional MPI library that fully supports MPI_THREAD_MULTIPLE.
 Before running configure the "mpirun" command should be available in the
 shell. If your command isn't called "mpirun" then define the "MPIRUN"
 environment variable, either in the shell or when running configure.
 
 
 METIS: a build of the METIS library can be optionally used to optimize the
-load between MPI nodes (requires an MPI library). This should be found in the
-standard installation directories, or pointed at using the "--with-metis"
-configuration option.  In this case the top-level installation directory of
-the METIS build should be given. Note to use METIS you should at least supply
-"--with-metis".
+load between MPI nodes (requires an MPI library). This should be found in
+the standard installation directories, or pointed at using the
+"--with-metis" configuration option.  In this case the top-level
+installation directory of the METIS build should be given. Note to use
+METIS you should at least supply "--with-metis".
 
 
-DOXYGEN: the doxygen library is required to create the SWIFT API documentation.
+libNUMA: a build of the NUMA library can be used to pin the threads to
+the physical core of the machine SWIFT is running on. This is not always
+necessary as the OS scheduler may do a good job at distributing the threads
+among the different cores on each computing node.
+
+
+DOXYGEN: the doxygen library is required to create the SWIFT API
+documentation.
diff --git a/configure.ac b/configure.ac
index 6caf61d3af8d1db707fe70ad27eae86beec647e2..44d21dc30d408260a11a7b7b100df455b98295d5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -16,7 +16,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 # Init the project.
-AC_INIT([SWIFT],[0.1.0])
+AC_INIT([SWIFT],[0.2.0])
 AC_CONFIG_SRCDIR([src/space.c])
 AC_CONFIG_AUX_DIR([.])
 AM_INIT_AUTOMAKE
@@ -179,10 +179,14 @@ if test "$enable_opt" = "yes" ; then
    ac_test_CFLAGS="yes"
    CFLAGS="$old_CFLAGS $CFLAGS"
 
-   # Check SSE & AVX support (some overlap with AX_CC_MAXOPT).
+   # Check SSE & AVX support (some overlap with AX_CC_MAXOPT). 
+   # Don't use the SIMD_FLAGS result with Intel compilers. The -x<code>
+   # value from AX_CC_MAXOPT should be sufficient.
    AX_EXT
    if test "$SIMD_FLAGS" != ""; then
-       CFLAGS="$CFLAGS $SIMD_FLAGS"
+       if test "$ax_cv_c_compiler_vendor" != "intel"; then
+           CFLAGS="$CFLAGS $SIMD_FLAGS"
+       fi
    fi
 fi
 
@@ -302,11 +306,13 @@ AC_CHECK_FUNC(pthread_setaffinity_np, AC_DEFINE([HAVE_SETAFFINITY],[true],
 AM_CONDITIONAL(HAVESETAFFINITY,
     [test "$ac_cv_func_pthread_setaffinity_np" = "yes"])
 
+have_numa="no"
 if test "$ac_cv_func_pthread_setaffinity_np" = "yes"; then
   # Check for libnuma.
   AC_CHECK_HEADER([numa.h])
   if test "$ac_cv_header_numa_h" = "yes"; then
     AC_CHECK_LIB([numa], [numa_available])
+    have_numa="yes"
   fi
 fi
 
@@ -367,14 +373,15 @@ AC_CONFIG_FILES([Makefile src/Makefile examples/Makefile doc/Makefile doc/Doxyfi
 
 # Report general configuration.
 AC_MSG_RESULT([
-   Compiler: $CC
-     vendor: $ax_cv_c_compiler_vendor
-    version: $ax_cv_c_compiler_version
-      flags: $CFLAGS
-   MPI enabled: $enable_mpi
-   HDF5 enabled: $with_hdf5
-       parallel: $have_parallel_hdf5
-   Metis enabled: $with_metis
+   Compiler        : $CC
+    - vendor       : $ax_cv_c_compiler_vendor
+    - version      : $ax_cv_c_compiler_version
+    - flags        : $CFLAGS
+   MPI enabled     : $enable_mpi
+   HDF5 enabled    : $with_hdf5
+    - parallel     : $have_parallel_hdf5
+   Metis enabled   : $with_metis
+   libNUMA enabled : $have_numa
 ])
 
 # Generate output.
diff --git a/examples/UniformBox/makeICbig.py b/examples/UniformBox/makeICbig.py
new file mode 100644
index 0000000000000000000000000000000000000000..28b4667cccf381532900c61228c24b3b6b1d62c1
--- /dev/null
+++ b/examples/UniformBox/makeICbig.py
@@ -0,0 +1,150 @@
+###############################################################################
+ # This file is part of SWIFT.
+ # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ # 
+ # This program is free software: you can redistribute it and/or modify
+ # it under the terms of the GNU Lesser General Public License as published
+ # by the Free Software Foundation, either version 3 of the License, or
+ # (at your option) any later version.
+ # 
+ # This program is distributed in the hope that it will be useful,
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ # GNU General Public License for more details.
+ # 
+ # You should have received a copy of the GNU Lesser General Public License
+ # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ # 
+ ##############################################################################
+
+import h5py
+import sys
+from numpy import *
+
+# Generates a swift IC file containing a cartesian distribution of particles
+# at a constant density and pressure in a cubic box
+
+# Parameters
+periodic= 1           # 1 For periodic box
+boxSize = 1.
+L = int(sys.argv[1])  # Number of particles along one axis
+N = int(sys.argv[2])  # Write N particles at a time to avoid requiring a lot of RAM
+rho = 2.              # Density
+P = 1.                # Pressure
+gamma = 5./3.         # Gas adiabatic index
+fileName = "uniformBox_%d.hdf5"%L
+
+#---------------------------------------------------
+numPart = L**3
+mass = boxSize**3 * rho / numPart
+internalEnergy = P / ((gamma - 1.)*rho)
+
+#---------------------------------------------------
+
+n_iterations = numPart / N
+remainder = numPart % N
+
+print "Writing", numPart, "in", n_iterations, "iterations of size", N, "and a remainder of", remainder
+
+#---------------------------------------------------
+
+#File
+file = h5py.File(fileName, 'w')
+
+# Header
+grp = file.create_group("/Header")
+grp.attrs["BoxSize"] = boxSize
+grp.attrs["NumPart_Total"] =  [numPart % (long(1)<<32), 0, 0, 0, 0, 0]
+grp.attrs["NumPart_Total_HighWord"] = [numPart / (long(1)<<32), 0, 0, 0, 0, 0]
+grp.attrs["NumPart_ThisFile"] = [numPart, 0, 0, 0, 0, 0]
+grp.attrs["Time"] = 0.0
+grp.attrs["NumFilesPerSnapshot"] = 1
+grp.attrs["MassTable"] = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+grp.attrs["Flag_Entropy_ICs"] = 0
+
+
+#Runtime parameters
+grp = file.create_group("/RuntimePars")
+grp.attrs["PeriodicBoundariesOn"] = periodic
+
+#Particle group
+grp = file.create_group("/PartType0")
+
+# First create the arrays in the file
+ds_v = grp.create_dataset('Velocities', (numPart, 3), 'f', chunks=True, compression="gzip")
+ds_m = grp.create_dataset('Masses', (numPart,1), 'f', chunks=True, compression="gzip")
+ds_h = grp.create_dataset('SmoothingLength', (numPart,1), 'f', chunks=True, compression="gzip")
+ds_u = grp.create_dataset('InternalEnergy', (numPart,1), 'f', chunks=True, compression="gzip")
+ds_id = grp.create_dataset('ParticleIDs', (numPart, 1), 'L', chunks=True, compression="gzip")
+ds_x = grp.create_dataset('Coordinates', (numPart, 3), 'd', chunks=True, compression="gzip")
+
+# Now loop and create parts of the dataset
+offset = 0
+for n in range(n_iterations):
+    v  = zeros((N, 3))
+    ds_v[offset:offset+N,:] = v
+    v = zeros(1)
+
+    m = full((N, 1), mass)
+    ds_m[offset:offset+N] = m
+    m = zeros(1)
+
+    h = full((N, 1), 1.1255 * boxSize / L)
+    ds_h[offset:offset+N] = h
+    h = zeros(1)
+
+    u = full((N, 1), internalEnergy)
+    ds_u[offset:offset+N] = u
+    u = zeros(1)
+
+    ids = linspace(offset, offset+N, N, endpoint=False).reshape((N,1))
+    ds_id[offset:offset+N] = ids
+    x      = ids % L;
+    y      = ((ids - x) / L) % L;
+    z      = (ids - x - L * y) / L**2;
+    ids    = zeros(1)
+    coords = zeros((N, 3))
+    coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L)
+    coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L)
+    coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L)
+    ds_x[offset:offset+N,:] = coords
+    coords  = zeros((1,3))
+
+    offset += N
+    print "Done", offset,"/", numPart, "(%.1f %%)"%(100*(float)(offset)/numPart)
+
+# And now, the remainder
+v  = zeros((remainder, 3))
+ds_v[offset:offset+remainder,:] = v
+v = zeros(1)
+
+m = full((remainder, 1), mass)
+ds_m[offset:offset+remainder] = m
+m = zeros(1)
+
+h = full((remainder, 1), 1.1255 * boxSize / L)
+ds_h[offset:offset+remainder] = h
+h = zeros(1)
+
+u = full((remainder, 1), internalEnergy)
+ds_u[offset:offset+remainder] = u
+u = zeros(1)
+
+print "Done", offset+remainder,"/", numPart
+
+ids = linspace(offset, offset+remainder, remainder, endpoint=False).reshape((remainder,1))
+ds_id[offset:offset+remainder] = ids
+x      = ids % L;
+y      = ((ids - x) / L) % L;
+z      = (ids - x - L * y) / L**2;
+coords = zeros((remainder, 3))
+coords[:,0] = z[:,0] * boxSize / L + boxSize / (2*L)
+coords[:,1] = y[:,0] * boxSize / L + boxSize / (2*L)
+coords[:,2] = x[:,0] * boxSize / L + boxSize / (2*L)
+ds_x[offset:offset+remainder,:] = coords
+
+
+
+
+
+file.close()
diff --git a/m4/ax_cc_maxopt.m4 b/m4/ax_cc_maxopt.m4
index 0d442612d161f18091aba6200ee965b85a797110..d2a0937232e0e8bf9c3e7e79c20267bfa1d75880 100644
--- a/m4/ax_cc_maxopt.m4
+++ b/m4/ax_cc_maxopt.m4
@@ -125,7 +125,7 @@ if test "$ac_test_CFLAGS" != "set"; then
 		    *1?6[[aef]]?:*:*:*|*2?6[[5cef]]?:*:*:*) icc_flags="-xSSE4.2 -xS -xT -xB -xK" ;;
 		    *2?6[[ad]]?:*:*:*) icc_flags="-xAVX -SSE4.2 -xS -xT -xB -xK" ;;
 		    *3?6[[ae]]?:*:*:*) icc_flags="-xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;;
-		    *3?6[[cf]]?:*:*:*|*4?6[[56]]?:*:*:*) icc_flags="-xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;;
+		    *3?6[[cf]]?:*:*:*|*4?6[[56]]?:*:*:*|*4?6[[ef]]?:*:*:*) icc_flags="-xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;;
 		    *000?f[[346]]?:*:*:*|?f[[346]]?:*:*:*|f[[346]]?:*:*:*) icc_flags="-xSSE3 -xP -xO -xN -xW -xK" ;;
 		    *00??f??:*:*:*|??f??:*:*:*|?f??:*:*:*|f??:*:*:*) icc_flags="-xSSE2 -xN -xW -xK" ;;
                   esac ;;
diff --git a/src/Makefile.am b/src/Makefile.am
index 19af973ef6200e53220447e09236c02535020d73..8cb27ef60ffaf884b1c0d7626b5d17d6a7a2a7fc 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -46,15 +46,20 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
 # Include files for distribution, not installation.
 nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel.h vector.h \
 		 runner_doiact.h runner_doiact_grav.h units.h intrinsics.h minmax.h \
-		 hydro.h hydro_io.h gravity.h \
+		 gravity.h \
 		 gravity/Default/gravity.h gravity/Default/runner_iact_grav.h \
 		 gravity/Default/gravity_part.h \
+	 	 hydro.h hydro_io.h \
 		 hydro/Minimal/hydro.h hydro/Minimal/hydro_iact.h hydro/Minimal/hydro_io.h \
                  hydro/Minimal/hydro_debug.h hydro/Minimal/hydro_part.h \
 		 hydro/Default/hydro.h hydro/Default/hydro_iact.h hydro/Default/hydro_io.h \
                  hydro/Default/hydro_debug.h hydro/Default/hydro_part.h \
 		 hydro/Gadget2/hydro.h hydro/Gadget2/hydro_iact.h hydro/Gadget2/hydro_io.h \
-                 hydro/Gadget2/hydro_debug.h hydro/Gadget2/hydro_part.h
+                 hydro/Gadget2/hydro_debug.h hydro/Gadget2/hydro_part.h \
+		 hydro/Gizmo/hydro.h hydro/Gizmo/hydro_iact.h hydro/Gizmo/hydro_io.h \
+                 hydro/Gizmo/hydro_debug.h hydro/Gizmo/hydro_part.h \
+	         riemann.h \
+		 riemann/riemann_hllc.h riemann/riemann_trrs.h riemann/riemann_exact.h
 
 # Sources and flags for regular library
 libswiftsim_la_SOURCES = $(AM_SOURCES)
diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h
index f92531bcce30d6c1461bb1366f88bc282a2747e7..d31b6be383b80a2698b63d27308f6fee9b23518f 100644
--- a/src/hydro/Gadget2/hydro_iact.h
+++ b/src/hydro/Gadget2/hydro_iact.h
@@ -51,7 +51,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_density(
   float dv[3], curlvr[3];
 
   /* Get the masses. */
-  const float mi = pj->mass;
+  const float mi = pi->mass;
   const float mj = pj->mass;
 
   /* Get r and r inverse. */
diff --git a/src/hydro/Gizmo/hydro.h b/src/hydro/Gizmo/hydro.h
new file mode 100644
index 0000000000000000000000000000000000000000..f3553a009c22a2f6353796e8a278f0db7d66d294
--- /dev/null
+++ b/src/hydro/Gizmo/hydro.h
@@ -0,0 +1,621 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/**
+ * @brief Computes the hydro time-step of a given particle
+ *
+ * @param p Pointer to the particle data
+ * @param xp Pointer to the extended particle data
+ *
+ */
+__attribute__((always_inline)) INLINE static float hydro_compute_timestep(
+    struct part* p, struct xpart* xp) {
+
+  return const_cfl * p->h / fabs(p->timestepvars.vmax);
+}
+
+/**
+ * @brief Initialises the particles for the first time
+ *
+ * This function is called only once just after the ICs have been
+ * read in to do some conversions.
+ *
+ * @param p The particle to act upon
+ * @param xp The extended particle data to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void hydro_first_init_part(struct part* p, struct xpart* xp) {
+}
+
+/**
+ * @brief Prepares a particle for the volume calculation.
+ *
+ * @param p The particle to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void hydro_init_part(struct part* p) {
+
+#ifdef SPH_GRADIENTS
+  /* use the old volumes to estimate new primitive variables to be used for the
+     gradient calculation */
+  if (p->conserved.mass) {
+    p->primitives.rho = p->conserved.mass / p->geometry.volume;
+    p->primitives.v[0] = p->conserved.momentum[0] / p->conserved.mass;
+    p->primitives.v[1] = p->conserved.momentum[1] / p->conserved.mass;
+    p->primitives.v[2] = p->conserved.momentum[2] / p->conserved.mass;
+    p->primitives.P =
+        (const_hydro_gamma - 1.) *
+        (p->conserved.energy -
+         0.5 * (p->conserved.momentum[0] * p->conserved.momentum[0] +
+                p->conserved.momentum[1] * p->conserved.momentum[1] +
+                p->conserved.momentum[2] * p->conserved.momentum[2]) /
+             p->conserved.mass) /
+        p->geometry.volume;
+  }
+#endif
+
+  p->density.wcount = 0.0f;
+  p->density.wcount_dh = 0.0f;
+  p->geometry.volume = 0.0f;
+  p->geometry.matrix_E[0][0] = 0.0f;
+  p->geometry.matrix_E[0][1] = 0.0f;
+  p->geometry.matrix_E[0][2] = 0.0f;
+  p->geometry.matrix_E[1][0] = 0.0f;
+  p->geometry.matrix_E[1][1] = 0.0f;
+  p->geometry.matrix_E[1][2] = 0.0f;
+  p->geometry.matrix_E[2][0] = 0.0f;
+  p->geometry.matrix_E[2][1] = 0.0f;
+  p->geometry.matrix_E[2][2] = 0.0f;
+
+#ifdef SPH_GRADIENTS
+  p->primitives.gradients.rho[0] = 0.0f;
+  p->primitives.gradients.rho[1] = 0.0f;
+  p->primitives.gradients.rho[2] = 0.0f;
+
+  p->primitives.gradients.v[0][0] = 0.0f;
+  p->primitives.gradients.v[0][1] = 0.0f;
+  p->primitives.gradients.v[0][2] = 0.0f;
+
+  p->primitives.gradients.v[1][0] = 0.0f;
+  p->primitives.gradients.v[1][1] = 0.0f;
+  p->primitives.gradients.v[1][2] = 0.0f;
+
+  p->primitives.gradients.v[2][0] = 0.0f;
+  p->primitives.gradients.v[2][1] = 0.0f;
+  p->primitives.gradients.v[2][2] = 0.0f;
+
+  p->primitives.gradients.P[0] = 0.0f;
+  p->primitives.gradients.P[1] = 0.0f;
+  p->primitives.gradients.P[2] = 0.0f;
+
+  p->primitives.limiter.rho[0] = FLT_MAX;
+  p->primitives.limiter.rho[1] = -FLT_MAX;
+  p->primitives.limiter.v[0][0] = FLT_MAX;
+  p->primitives.limiter.v[0][1] = -FLT_MAX;
+  p->primitives.limiter.v[1][0] = FLT_MAX;
+  p->primitives.limiter.v[1][1] = -FLT_MAX;
+  p->primitives.limiter.v[2][0] = FLT_MAX;
+  p->primitives.limiter.v[2][1] = -FLT_MAX;
+  p->primitives.limiter.P[0] = FLT_MAX;
+  p->primitives.limiter.P[1] = -FLT_MAX;
+
+  p->primitives.limiter.maxr = -FLT_MAX;
+#endif
+}
+
+/**
+ * @brief Finishes the density calculation.
+ *
+ * Multiplies the density and number of neighbours by the appropiate constants
+ * and add the self-contribution term.
+ *
+ * @param p The particle to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void hydro_end_volume(struct part* p) {
+
+  /* Some smoothing length multiples. */
+  const float h = p->h;
+  const float ih = 1.0f / h;
+
+  /* Final operation on the density. */
+  p->density.wcount =
+      (p->density.wcount + kernel_root) * (4.0f / 3.0 * M_PI * kernel_gamma3);
+  p->density.wcount_dh =
+      p->density.wcount_dh * ih * (4.0f / 3.0 * M_PI * kernel_gamma3);
+}
+
+/**
+ * @brief Prepare a particle for the force calculation.
+ *
+ * Computes viscosity term, conduction term and smoothing length gradient terms.
+ *
+ * @param p The particle to act upon
+ * @param xp The extended particle data to act upon
+ */
+__attribute__((always_inline)) INLINE static void hydro_prepare_gradient(
+    struct part* p, struct xpart* xp) {
+
+  /* Some smoothing length multiples. */
+  const float h = p->h;
+  const float ih = 1.0f / h;
+  const float ih2 = ih * ih;
+
+  float detE, volume;
+  float E[3][3];
+  GFLOAT m, momentum[3], energy;
+
+#ifndef THERMAL_ENERGY
+  GFLOAT momentum2;
+#endif
+
+#if defined(SPH_GRADIENTS) && defined(SLOPE_LIMITER)
+  GFLOAT gradrho[3], gradv[3][3], gradP[3];
+  GFLOAT gradtrue, gradmax, gradmin, alpha;
+#endif
+
+  /* Final operation on the geometry. */
+  /* we multiply with the smoothing kernel normalization ih3 and calculate the
+   * volume */
+  volume = ih * ih2 * (p->geometry.volume + kernel_root);
+  p->geometry.volume = volume = 1. / volume;
+  /* we multiply with the smoothing kernel normalization */
+  p->geometry.matrix_E[0][0] = E[0][0] = ih * ih2 * p->geometry.matrix_E[0][0];
+  p->geometry.matrix_E[0][1] = E[0][1] = ih * ih2 * p->geometry.matrix_E[0][1];
+  p->geometry.matrix_E[0][2] = E[0][2] = ih * ih2 * p->geometry.matrix_E[0][2];
+  p->geometry.matrix_E[1][0] = E[1][0] = ih * ih2 * p->geometry.matrix_E[1][0];
+  p->geometry.matrix_E[1][1] = E[1][1] = ih * ih2 * p->geometry.matrix_E[1][1];
+  p->geometry.matrix_E[1][2] = E[1][2] = ih * ih2 * p->geometry.matrix_E[1][2];
+  p->geometry.matrix_E[2][0] = E[2][0] = ih * ih2 * p->geometry.matrix_E[2][0];
+  p->geometry.matrix_E[2][1] = E[2][1] = ih * ih2 * p->geometry.matrix_E[2][1];
+  p->geometry.matrix_E[2][2] = E[2][2] = ih * ih2 * p->geometry.matrix_E[2][2];
+
+  /* invert the E-matrix */
+  /* code shamelessly stolen from the public version of GIZMO */
+  /* But since we should never invert a matrix, this code has to be replaced */
+  detE = E[0][0] * E[1][1] * E[2][2] + E[0][1] * E[1][2] * E[2][0] +
+         E[0][2] * E[1][0] * E[2][1] - E[0][2] * E[1][1] * E[2][0] -
+         E[0][1] * E[1][0] * E[2][2] - E[0][0] * E[1][2] * E[2][1];
+  /* check for zero determinant */
+  if ((detE != 0) && !isnan(detE)) {
+    p->geometry.matrix_E[0][0] = (E[1][1] * E[2][2] - E[1][2] * E[2][1]) / detE;
+    p->geometry.matrix_E[0][1] = (E[0][2] * E[2][1] - E[0][1] * E[2][2]) / detE;
+    p->geometry.matrix_E[0][2] = (E[0][1] * E[1][2] - E[0][2] * E[1][1]) / detE;
+    p->geometry.matrix_E[1][0] = (E[1][2] * E[2][0] - E[1][0] * E[2][2]) / detE;
+    p->geometry.matrix_E[1][1] = (E[0][0] * E[2][2] - E[0][2] * E[2][0]) / detE;
+    p->geometry.matrix_E[1][2] = (E[0][2] * E[1][0] - E[0][0] * E[1][2]) / detE;
+    p->geometry.matrix_E[2][0] = (E[1][0] * E[2][1] - E[1][1] * E[2][0]) / detE;
+    p->geometry.matrix_E[2][1] = (E[0][1] * E[2][0] - E[0][0] * E[2][1]) / detE;
+    p->geometry.matrix_E[2][2] = (E[0][0] * E[1][1] - E[0][1] * E[1][0]) / detE;
+  } else {
+    /* if the E-matrix is not well behaved, we cannot use it */
+    p->geometry.matrix_E[0][0] = 0.0f;
+    p->geometry.matrix_E[0][1] = 0.0f;
+    p->geometry.matrix_E[0][2] = 0.0f;
+    p->geometry.matrix_E[1][0] = 0.0f;
+    p->geometry.matrix_E[1][1] = 0.0f;
+    p->geometry.matrix_E[1][2] = 0.0f;
+    p->geometry.matrix_E[2][0] = 0.0f;
+    p->geometry.matrix_E[2][1] = 0.0f;
+    p->geometry.matrix_E[2][2] = 0.0f;
+  }
+
+#ifdef SPH_GRADIENTS
+  /* finalize gradients by multiplying with volume */
+  p->primitives.gradients.rho[0] *= ih2 * ih2 * volume;
+  p->primitives.gradients.rho[1] *= ih2 * ih2 * volume;
+  p->primitives.gradients.rho[2] *= ih2 * ih2 * volume;
+
+  p->primitives.gradients.v[0][0] *= ih2 * ih2 * volume;
+  p->primitives.gradients.v[0][1] *= ih2 * ih2 * volume;
+  p->primitives.gradients.v[0][2] *= ih2 * ih2 * volume;
+
+  p->primitives.gradients.v[1][0] *= ih2 * ih2 * volume;
+  p->primitives.gradients.v[1][1] *= ih2 * ih2 * volume;
+  p->primitives.gradients.v[1][2] *= ih2 * ih2 * volume;
+
+  p->primitives.gradients.v[2][0] *= ih2 * ih2 * volume;
+  p->primitives.gradients.v[2][1] *= ih2 * ih2 * volume;
+  p->primitives.gradients.v[2][2] *= ih2 * ih2 * volume;
+
+  p->primitives.gradients.P[0] *= ih2 * ih2 * volume;
+  p->primitives.gradients.P[1] *= ih2 * ih2 * volume;
+  p->primitives.gradients.P[2] *= ih2 * ih2 * volume;
+
+/* slope limiter */
+#ifdef SLOPE_LIMITER
+  gradrho[0] = p->primitives.gradients.rho[0];
+  gradrho[1] = p->primitives.gradients.rho[1];
+  gradrho[2] = p->primitives.gradients.rho[2];
+
+  gradv[0][0] = p->primitives.gradients.v[0][0];
+  gradv[0][1] = p->primitives.gradients.v[0][1];
+  gradv[0][2] = p->primitives.gradients.v[0][2];
+
+  gradv[1][0] = p->primitives.gradients.v[1][0];
+  gradv[1][1] = p->primitives.gradients.v[1][1];
+  gradv[1][2] = p->primitives.gradients.v[1][2];
+
+  gradv[2][0] = p->primitives.gradients.v[2][0];
+  gradv[2][1] = p->primitives.gradients.v[2][1];
+  gradv[2][2] = p->primitives.gradients.v[2][2];
+
+  gradP[0] = p->primitives.gradients.P[0];
+  gradP[1] = p->primitives.gradients.P[1];
+  gradP[2] = p->primitives.gradients.P[2];
+
+  gradtrue = sqrtf(gradrho[0] * gradrho[0] + gradrho[1] * gradrho[1] +
+                   gradrho[2] * gradrho[2]);
+  /* gradtrue might be zero. In this case, there is no gradient and we don't
+     need to slope limit anything... */
+  if (gradtrue) {
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.rho[1] - p->primitives.rho;
+    gradmin = p->primitives.rho - p->primitives.limiter.rho[0];
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.rho[0] *= alpha;
+    p->primitives.gradients.rho[1] *= alpha;
+    p->primitives.gradients.rho[2] *= alpha;
+  }
+
+  gradtrue = sqrtf(gradv[0][0] * gradv[0][0] + gradv[0][1] * gradv[0][1] +
+                   gradv[0][2] * gradv[0][2]);
+  if (gradtrue) {
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.v[0][1] - p->primitives.v[0];
+    gradmin = p->primitives.v[0] - p->primitives.limiter.v[0][0];
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.v[0][0] *= alpha;
+    p->primitives.gradients.v[0][1] *= alpha;
+    p->primitives.gradients.v[0][2] *= alpha;
+  }
+
+  gradtrue = sqrtf(gradv[1][0] * gradv[1][0] + gradv[1][1] * gradv[1][1] +
+                   gradv[1][2] * gradv[1][2]);
+  if (gradtrue) {
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.v[1][1] - p->primitives.v[1];
+    gradmin = p->primitives.v[1] - p->primitives.limiter.v[1][0];
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.v[1][0] *= alpha;
+    p->primitives.gradients.v[1][1] *= alpha;
+    p->primitives.gradients.v[1][2] *= alpha;
+  }
+
+  gradtrue = sqrtf(gradv[2][0] * gradv[2][0] + gradv[2][1] * gradv[2][1] +
+                   gradv[2][2] * gradv[2][2]);
+  if (gradtrue) {
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.v[2][1] - p->primitives.v[2];
+    gradmin = p->primitives.v[2] - p->primitives.limiter.v[2][0];
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.v[2][0] *= alpha;
+    p->primitives.gradients.v[2][1] *= alpha;
+    p->primitives.gradients.v[2][2] *= alpha;
+  }
+
+  gradtrue =
+      sqrtf(gradP[0] * gradP[0] + gradP[1] * gradP[1] + gradP[2] * gradP[2]);
+  if (gradtrue) {
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.P[1] - p->primitives.P;
+    gradmin = p->primitives.P - p->primitives.limiter.P[0];
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.P[0] *= alpha;
+    p->primitives.gradients.P[1] *= alpha;
+    p->primitives.gradients.P[2] *= alpha;
+  }
+#endif  // SLOPE_LIMITER
+#else   // SPH_GRADIENTS
+  p->primitives.gradients.rho[0] = 0.0f;
+  p->primitives.gradients.rho[1] = 0.0f;
+  p->primitives.gradients.rho[2] = 0.0f;
+
+  p->primitives.gradients.v[0][0] = 0.0f;
+  p->primitives.gradients.v[0][1] = 0.0f;
+  p->primitives.gradients.v[0][2] = 0.0f;
+
+  p->primitives.gradients.v[1][0] = 0.0f;
+  p->primitives.gradients.v[1][1] = 0.0f;
+  p->primitives.gradients.v[1][2] = 0.0f;
+
+  p->primitives.gradients.v[2][0] = 0.0f;
+  p->primitives.gradients.v[2][1] = 0.0f;
+  p->primitives.gradients.v[2][2] = 0.0f;
+
+  p->primitives.gradients.P[0] = 0.0f;
+  p->primitives.gradients.P[1] = 0.0f;
+  p->primitives.gradients.P[2] = 0.0f;
+
+  p->primitives.limiter.rho[0] = FLT_MAX;
+  p->primitives.limiter.rho[1] = -FLT_MAX;
+  p->primitives.limiter.v[0][0] = FLT_MAX;
+  p->primitives.limiter.v[0][1] = -FLT_MAX;
+  p->primitives.limiter.v[1][0] = FLT_MAX;
+  p->primitives.limiter.v[1][1] = -FLT_MAX;
+  p->primitives.limiter.v[2][0] = FLT_MAX;
+  p->primitives.limiter.v[2][1] = -FLT_MAX;
+  p->primitives.limiter.P[0] = FLT_MAX;
+  p->primitives.limiter.P[1] = -FLT_MAX;
+
+  p->primitives.limiter.maxr = -FLT_MAX;
+#endif  // SPH_GRADIENTS
+
+  /* compute primitive variables */
+  /* eqns (3)-(5) */
+  m = p->conserved.mass;
+  if (m) {
+    momentum[0] = p->conserved.momentum[0];
+    momentum[1] = p->conserved.momentum[1];
+    momentum[2] = p->conserved.momentum[2];
+#ifndef THERMAL_ENERGY
+    momentum2 = (momentum[0] * momentum[0] + momentum[1] * momentum[1] +
+                 momentum[2] * momentum[2]);
+#endif
+    energy = p->conserved.energy;
+    p->primitives.rho = m / volume;
+    p->primitives.v[0] = momentum[0] / m;
+    p->primitives.v[1] = momentum[1] / m;
+    p->primitives.v[2] = momentum[2] / m;
+#ifndef THERMAL_ENERGY
+    p->primitives.P =
+        (const_hydro_gamma - 1.) * (energy - 0.5 * momentum2 / m) / volume;
+#else
+    p->primitives.P = (const_hydro_gamma - 1.) * energy / volume;
+#endif
+  }
+}
+
+/**
+ * @brief Finishes the gradient calculation.
+ *
+ * @param p The particle to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void hydro_end_gradient(struct part* p) {
+
+#ifndef SPH_GRADIENTS
+  float h, ih, ih2, ih3;
+#ifdef SLOPE_LIMITER
+  GFLOAT gradrho[3], gradv[3][3], gradP[3];
+  GFLOAT gradtrue, gradmax, gradmin, alpha;
+#endif
+
+  /* add kernel normalization to gradients */
+  h = p->h;
+  ih = 1.0f / h;
+  ih2 = ih * ih;
+  ih3 = ih * ih2;
+
+  p->primitives.gradients.rho[0] *= ih3;
+  p->primitives.gradients.rho[1] *= ih3;
+  p->primitives.gradients.rho[2] *= ih3;
+
+  p->primitives.gradients.v[0][0] *= ih3;
+  p->primitives.gradients.v[0][1] *= ih3;
+  p->primitives.gradients.v[0][2] *= ih3;
+  p->primitives.gradients.v[1][0] *= ih3;
+  p->primitives.gradients.v[1][1] *= ih3;
+  p->primitives.gradients.v[1][2] *= ih3;
+  p->primitives.gradients.v[2][0] *= ih3;
+  p->primitives.gradients.v[2][1] *= ih3;
+  p->primitives.gradients.v[2][2] *= ih3;
+
+  p->primitives.gradients.P[0] *= ih3;
+  p->primitives.gradients.P[1] *= ih3;
+  p->primitives.gradients.P[2] *= ih3;
+
+/* slope limiter */
+#ifdef SLOPE_LIMITER
+  gradrho[0] = p->primitives.gradients.rho[0];
+  gradrho[1] = p->primitives.gradients.rho[1];
+  gradrho[2] = p->primitives.gradients.rho[2];
+
+  gradv[0][0] = p->primitives.gradients.v[0][0];
+  gradv[0][1] = p->primitives.gradients.v[0][1];
+  gradv[0][2] = p->primitives.gradients.v[0][2];
+
+  gradv[1][0] = p->primitives.gradients.v[1][0];
+  gradv[1][1] = p->primitives.gradients.v[1][1];
+  gradv[1][2] = p->primitives.gradients.v[1][2];
+
+  gradv[2][0] = p->primitives.gradients.v[2][0];
+  gradv[2][1] = p->primitives.gradients.v[2][1];
+  gradv[2][2] = p->primitives.gradients.v[2][2];
+
+  gradP[0] = p->primitives.gradients.P[0];
+  gradP[1] = p->primitives.gradients.P[1];
+  gradP[2] = p->primitives.gradients.P[2];
+
+  gradtrue = gradrho[0] * gradrho[0] + gradrho[1] * gradrho[1] +
+             gradrho[2] * gradrho[2];
+  /* gradtrue might be zero. In this case, there is no gradient and we don't
+     need to slope limit anything... */
+  if (gradtrue) {
+    gradtrue = sqrtf(gradtrue);
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.rho[1] - p->primitives.rho;
+    gradmin = p->primitives.rho - p->primitives.limiter.rho[0];
+    /* gradmin and gradmax might be negative if the value of the current
+       particle is larger/smaller than all neighbouring values */
+    gradmax = fabs(gradmax);
+    gradmin = fabs(gradmin);
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.rho[0] *= alpha;
+    p->primitives.gradients.rho[1] *= alpha;
+    p->primitives.gradients.rho[2] *= alpha;
+  }
+
+  gradtrue = gradv[0][0] * gradv[0][0] + gradv[0][1] * gradv[0][1] +
+             gradv[0][2] * gradv[0][2];
+  if (gradtrue) {
+    gradtrue = sqrtf(gradtrue);
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.v[0][1] - p->primitives.v[0];
+    gradmin = p->primitives.v[0] - p->primitives.limiter.v[0][0];
+    gradmax = fabs(gradmax);
+    gradmin = fabs(gradmin);
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.v[0][0] *= alpha;
+    p->primitives.gradients.v[0][1] *= alpha;
+    p->primitives.gradients.v[0][2] *= alpha;
+  }
+
+  gradtrue = gradv[1][0] * gradv[1][0] + gradv[1][1] * gradv[1][1] +
+             gradv[1][2] * gradv[1][2];
+  if (gradtrue) {
+    gradtrue = sqrtf(gradtrue);
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.v[1][1] - p->primitives.v[1];
+    gradmin = p->primitives.v[1] - p->primitives.limiter.v[1][0];
+    gradmax = fabs(gradmax);
+    gradmin = fabs(gradmin);
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.v[1][0] *= alpha;
+    p->primitives.gradients.v[1][1] *= alpha;
+    p->primitives.gradients.v[1][2] *= alpha;
+  }
+
+  gradtrue = gradv[2][0] * gradv[2][0] + gradv[2][1] * gradv[2][1] +
+             gradv[2][2] * gradv[2][2];
+  if (gradtrue) {
+    gradtrue = sqrtf(gradtrue);
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.v[2][1] - p->primitives.v[2];
+    gradmin = p->primitives.v[2] - p->primitives.limiter.v[2][0];
+    gradmax = fabs(gradmax);
+    gradmin = fabs(gradmin);
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.v[2][0] *= alpha;
+    p->primitives.gradients.v[2][1] *= alpha;
+    p->primitives.gradients.v[2][2] *= alpha;
+  }
+
+  gradtrue = gradP[0] * gradP[0] + gradP[1] * gradP[1] + gradP[2] * gradP[2];
+  if (gradtrue) {
+    gradtrue = sqrtf(gradtrue);
+    gradtrue *= p->primitives.limiter.maxr;
+    gradmax = p->primitives.limiter.P[1] - p->primitives.P;
+    gradmin = p->primitives.P - p->primitives.limiter.P[0];
+    gradmax = fabs(gradmax);
+    gradmin = fabs(gradmin);
+    alpha = fmin(1.0f, fmin(gradmax / gradtrue, gradmin / gradtrue));
+    p->primitives.gradients.P[0] *= alpha;
+    p->primitives.gradients.P[1] *= alpha;
+    p->primitives.gradients.P[2] *= alpha;
+  }
+#endif  // SLOPE_LIMITER
+
+#endif  // SPH_GRADIENTS
+}
+
+/**
+ * @brief Prepare a particle for the fluxes calculation.
+ *
+ * @param p The particle to act upon
+ * @param xp The extended particle data to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void hydro_prepare_fluxes(struct part* p, struct xpart* xp) {
+
+  /* initialize variables used for timestep calculation */
+  p->timestepvars.vmax = 0.0f;
+}
+
+/**
+ * @brief Reset acceleration fields of a particle
+ *
+ * Resets all hydro acceleration and time derivative fields in preparation
+ * for the sums taking place in the variaous force tasks
+ *
+ * @param p The particle to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void hydro_reset_acceleration(struct part* p) {
+
+  /* figure out what to put here */
+}
+
+/**
+ * @brief Finishes the fluxes calculation.
+ *
+ * Multiplies the forces and accelerationsby the appropiate constants
+ *
+ * @param p The particle to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void hydro_end_fluxes(struct part* p) {
+
+  /* do nothing */
+}
+
+/**
+ * @brief Converts hydro quantity of a particle
+ *
+ * Requires the volume to be known
+ *
+ * @param p The particle to act upon
+ */
+__attribute__((always_inline))
+    INLINE static void hydro_convert_quantities(struct part* p) {
+
+  float volume;
+  GFLOAT m;
+  GFLOAT momentum[3];
+#ifndef THERMAL_ENERGY
+  GFLOAT momentum2;
+#endif
+  volume = p->geometry.volume;
+
+  /* set hydro velocities */
+  p->primitives.v[0] = p->v[0];
+  p->primitives.v[1] = p->v[1];
+  p->primitives.v[2] = p->v[2];
+  /* P actually contains internal energy at this point */
+  p->primitives.P *= (const_hydro_gamma - 1.) * p->primitives.rho;
+
+  p->conserved.mass = m = p->primitives.rho * volume;
+  p->conserved.momentum[0] = momentum[0] = m * p->primitives.v[0];
+  p->conserved.momentum[1] = momentum[1] = m * p->primitives.v[1];
+  p->conserved.momentum[2] = momentum[2] = m * p->primitives.v[2];
+#ifndef THERMAL_ENERGY
+  momentum2 = momentum[0] * momentum[0] + momentum[1] * momentum[1] +
+              momentum[2] * momentum[2];
+  p->conserved.energy =
+      p->primitives.P / (const_hydro_gamma - 1.) * volume + 0.5 * momentum2 / m;
+#else
+  p->conserved.energy = p->primitives.P / (const_hydro_gamma - 1.) * volume;
+#endif
+}
+
+// MATTHIEU
+__attribute__((always_inline))
+    INLINE static void hydro_end_density(struct part* p, float time) {}
+__attribute__((always_inline)) INLINE static void hydro_prepare_force(
+    struct part* p, struct xpart* xp, int ti_current, double timeBase) {}
+__attribute__((always_inline)) INLINE static void hydro_predict_extra(
+    struct part* p, struct xpart* xp, int t0, int t1, double timeBase) {}
+__attribute__((always_inline))
+    INLINE static void hydro_end_force(struct part* p) {}
+__attribute__((always_inline)) INLINE static void hydro_kick_extra(
+    struct part* p, struct xpart* xp, float dt, float half_dt) {}
+__attribute__((always_inline))
+    INLINE static float hydro_get_internal_energy(struct part* p) {
+  return 0.f;
+}
diff --git a/src/hydro/Gizmo/hydro_debug.h b/src/hydro/Gizmo/hydro_debug.h
new file mode 100644
index 0000000000000000000000000000000000000000..2cc957ed883436ce57e9d53d00a073693c9495df
--- /dev/null
+++ b/src/hydro/Gizmo/hydro_debug.h
@@ -0,0 +1,27 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+__attribute__((always_inline))
+    INLINE static void hydro_debug_particle(struct part* p, struct xpart* xp) {
+  printf(
+      "x=[%.16e,%.16e,%.16e], "
+      "v=[%.3e,%.3e,%.3e], a=[%.3e,%.3e,%.3e], volume=%.3e\n",
+      p->x[0], p->x[1], p->x[2], p->v[0], p->v[1], p->v[2], p->a_hydro[0],
+      p->a_hydro[1], p->a_hydro[2], p->geometry.volume);
+}
diff --git a/src/hydro/Gizmo/hydro_iact.h b/src/hydro/Gizmo/hydro_iact.h
new file mode 100644
index 0000000000000000000000000000000000000000..4fe875d3d07315051ef8b3051665a9ea0ef261b8
--- /dev/null
+++ b/src/hydro/Gizmo/hydro_iact.h
@@ -0,0 +1,1035 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *                    Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#include "riemann.h"
+
+#define USE_GRADIENTS
+#define PER_FACE_LIMITER
+/* #define PRINT_ID 0 */
+
+/* this corresponds to task_subtype_hydro_loop1 */
+__attribute__((always_inline)) INLINE static void runner_iact_hydro_loop1(
+    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
+
+  float r = sqrtf(r2);
+  float xi, xj;
+  float h_inv;
+  float wi, wj, wi_dx, wj_dx;
+  int k, l;
+
+  /* Compute density of pi. */
+  h_inv = 1.0 / hi;
+  xi = r * h_inv;
+  kernel_deval(xi, &wi, &wi_dx);
+
+  pi->density.wcount += wi;
+  pi->density.wcount_dh -= xi * wi_dx;
+
+  /* these are eqns. (1) and (2) in the summary */
+  pi->geometry.volume += wi;
+  for (k = 0; k < 3; k++)
+    for (l = 0; l < 3; l++) pi->geometry.matrix_E[k][l] += dx[k] * dx[l] * wi;
+
+#ifdef SPH_GRADIENTS
+  /* very basic gradient estimate */
+  pi->primitives.gradients.rho[0] -=
+      wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r;
+  pi->primitives.gradients.rho[1] -=
+      wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r;
+  pi->primitives.gradients.rho[2] -=
+      wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r;
+
+  pi->primitives.gradients.v[0][0] -=
+      wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+  pi->primitives.gradients.v[0][1] -=
+      wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+  pi->primitives.gradients.v[0][2] -=
+      wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+
+  pi->primitives.gradients.v[1][0] -=
+      wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pi->primitives.gradients.v[1][1] -=
+      wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pi->primitives.gradients.v[1][2] -=
+      wi_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+
+  pi->primitives.gradients.v[2][0] -=
+      wi_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+  pi->primitives.gradients.v[2][1] -=
+      wi_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+  pi->primitives.gradients.v[2][2] -=
+      wi_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+
+  pi->primitives.gradients.P[0] -=
+      wi_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r;
+  pi->primitives.gradients.P[1] -=
+      wi_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r;
+  pi->primitives.gradients.P[2] -=
+      wi_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r;
+
+  /* basic slope limiter: collect the maximal and the minimal value for the
+   * primitive variables among the ngbs */
+  pi->primitives.limiter.rho[0] =
+      fmin(pj->primitives.rho, pi->primitives.limiter.rho[0]);
+  pi->primitives.limiter.rho[1] =
+      fmax(pj->primitives.rho, pi->primitives.limiter.rho[1]);
+
+  pi->primitives.limiter.v[0][0] =
+      fmin(pj->primitives.v[0], pi->primitives.limiter.v[0][0]);
+  pi->primitives.limiter.v[0][1] =
+      fmax(pj->primitives.v[0], pi->primitives.limiter.v[0][1]);
+  pi->primitives.limiter.v[1][0] =
+      fmin(pj->primitives.v[1], pi->primitives.limiter.v[1][0]);
+  pi->primitives.limiter.v[1][1] =
+      fmax(pj->primitives.v[1], pi->primitives.limiter.v[1][1]);
+  pi->primitives.limiter.v[2][0] =
+      fmin(pj->primitives.v[2], pi->primitives.limiter.v[2][0]);
+  pi->primitives.limiter.v[2][1] =
+      fmax(pj->primitives.v[2], pi->primitives.limiter.v[2][1]);
+
+  pi->primitives.limiter.P[0] =
+      fmin(pj->primitives.P, pi->primitives.limiter.P[0]);
+  pi->primitives.limiter.P[1] =
+      fmax(pj->primitives.P, pi->primitives.limiter.P[1]);
+
+  pi->primitives.limiter.maxr = fmax(r, pi->primitives.limiter.maxr);
+#endif
+
+  /* Compute density of pj. */
+  h_inv = 1.0 / hj;
+  xj = r * h_inv;
+  kernel_deval(xj, &wj, &wj_dx);
+
+  pj->density.wcount += wj;
+  pj->density.wcount_dh -= xj * wj_dx;
+
+  /* these are eqns. (1) and (2) in the summary */
+  pj->geometry.volume += wj;
+  for (k = 0; k < 3; k++)
+    for (l = 0; l < 3; l++) pj->geometry.matrix_E[k][l] += dx[k] * dx[l] * wj;
+
+#ifdef SPH_GRADIENTS
+  /* very basic gradient estimate */
+  /* signs are the same as before, since we swap i and j twice */
+  pj->primitives.gradients.rho[0] -=
+      wj_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r;
+  pj->primitives.gradients.rho[1] -=
+      wj_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r;
+  pj->primitives.gradients.rho[2] -=
+      wj_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r;
+
+  pj->primitives.gradients.v[0][0] -=
+      wj_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+  pj->primitives.gradients.v[0][1] -=
+      wj_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+  pj->primitives.gradients.v[0][2] -=
+      wj_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+
+  pj->primitives.gradients.v[1][0] -=
+      wj_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pj->primitives.gradients.v[1][1] -=
+      wj_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pj->primitives.gradients.v[1][2] -=
+      wj_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+
+  pj->primitives.gradients.v[2][0] -=
+      wj_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+  pj->primitives.gradients.v[2][1] -=
+      wj_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+  pj->primitives.gradients.v[2][2] -=
+      wj_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+
+  pj->primitives.gradients.P[0] -=
+      wj_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r;
+  pj->primitives.gradients.P[1] -=
+      wj_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r;
+  pj->primitives.gradients.P[2] -=
+      wj_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r;
+
+  /* basic slope limiter: collect the maximal and the minimal value for the
+   * primitive variables among the ngbs */
+  pj->primitives.limiter.rho[0] =
+      fmin(pi->primitives.rho, pj->primitives.limiter.rho[0]);
+  pj->primitives.limiter.rho[1] =
+      fmax(pi->primitives.rho, pj->primitives.limiter.rho[1]);
+
+  pj->primitives.limiter.v[0][0] =
+      fmin(pi->primitives.v[0], pj->primitives.limiter.v[0][0]);
+  pj->primitives.limiter.v[0][1] =
+      fmax(pi->primitives.v[0], pj->primitives.limiter.v[0][1]);
+  pj->primitives.limiter.v[1][0] =
+      fmin(pi->primitives.v[1], pj->primitives.limiter.v[1][0]);
+  pj->primitives.limiter.v[1][1] =
+      fmax(pi->primitives.v[1], pj->primitives.limiter.v[1][1]);
+  pj->primitives.limiter.v[2][0] =
+      fmin(pi->primitives.v[2], pj->primitives.limiter.v[2][0]);
+  pj->primitives.limiter.v[2][1] =
+      fmax(pi->primitives.v[2], pj->primitives.limiter.v[2][1]);
+
+  pj->primitives.limiter.P[0] =
+      fmin(pi->primitives.P, pj->primitives.limiter.P[0]);
+  pj->primitives.limiter.P[1] =
+      fmax(pi->primitives.P, pj->primitives.limiter.P[1]);
+
+  pj->primitives.limiter.maxr = fmax(r, pj->primitives.limiter.maxr);
+#endif
+}
+
+/* this corresponds to task_subtype_hydro_loop1 */
+__attribute__((always_inline))
+    INLINE static void runner_iact_nonsym_hydro_loop1(float r2, float *dx,
+                                                      float hi, float hj,
+                                                      struct part *pi,
+                                                      struct part *pj) {
+
+  float r;
+  float xi;
+  float h_inv;
+  float wi, wi_dx;
+  int k, l;
+
+  /* Get r and r inverse. */
+  r = sqrtf(r2);
+
+  h_inv = 1.0 / hi;
+  xi = r * h_inv;
+  kernel_deval(xi, &wi, &wi_dx);
+
+  pi->density.wcount += wi;
+  pi->density.wcount_dh -= xi * wi_dx;
+
+  /* these are eqns. (1) and (2) in the summary */
+  pi->geometry.volume += wi;
+  for (k = 0; k < 3; k++)
+    for (l = 0; l < 3; l++) pi->geometry.matrix_E[k][l] += dx[k] * dx[l] * wi;
+
+#ifdef SPH_GRADIENTS
+  /* very basic gradient estimate */
+  pi->primitives.gradients.rho[0] -=
+      wi_dx * dx[0] * (pi->primitives.rho - pj->primitives.rho) / r;
+  pi->primitives.gradients.rho[1] -=
+      wi_dx * dx[1] * (pi->primitives.rho - pj->primitives.rho) / r;
+  pi->primitives.gradients.rho[2] -=
+      wi_dx * dx[2] * (pi->primitives.rho - pj->primitives.rho) / r;
+
+  pi->primitives.gradients.v[0][0] -=
+      wi_dx * dx[0] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+  pi->primitives.gradients.v[0][1] -=
+      wi_dx * dx[1] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+  pi->primitives.gradients.v[0][2] -=
+      wi_dx * dx[2] * (pi->primitives.v[0] - pj->primitives.v[0]) / r;
+
+  pi->primitives.gradients.v[1][0] -=
+      wi_dx * dx[0] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pi->primitives.gradients.v[1][1] -=
+      wi_dx * dx[1] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+  pi->primitives.gradients.v[1][2] -=
+      wi_dx * dx[2] * (pi->primitives.v[1] - pj->primitives.v[1]) / r;
+
+  pi->primitives.gradients.v[2][0] -=
+      wi_dx * dx[0] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+  pi->primitives.gradients.v[2][1] -=
+      wi_dx * dx[1] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+  pi->primitives.gradients.v[2][2] -=
+      wi_dx * dx[2] * (pi->primitives.v[2] - pj->primitives.v[2]) / r;
+
+  pi->primitives.gradients.P[0] -=
+      wi_dx * dx[0] * (pi->primitives.P - pj->primitives.P) / r;
+  pi->primitives.gradients.P[1] -=
+      wi_dx * dx[1] * (pi->primitives.P - pj->primitives.P) / r;
+  pi->primitives.gradients.P[2] -=
+      wi_dx * dx[2] * (pi->primitives.P - pj->primitives.P) / r;
+
+  /* slope limiter */
+  pi->primitives.limiter.rho[0] =
+      fmin(pj->primitives.rho, pi->primitives.limiter.rho[0]);
+  pi->primitives.limiter.rho[1] =
+      fmax(pj->primitives.rho, pi->primitives.limiter.rho[1]);
+
+  pi->primitives.limiter.v[0][0] =
+      fmin(pj->primitives.v[0], pi->primitives.limiter.v[0][0]);
+  pi->primitives.limiter.v[0][1] =
+      fmax(pj->primitives.v[0], pi->primitives.limiter.v[0][1]);
+  pi->primitives.limiter.v[1][0] =
+      fmin(pj->primitives.v[1], pi->primitives.limiter.v[1][0]);
+  pi->primitives.limiter.v[1][1] =
+      fmax(pj->primitives.v[1], pi->primitives.limiter.v[1][1]);
+  pi->primitives.limiter.v[2][0] =
+      fmin(pj->primitives.v[2], pi->primitives.limiter.v[2][0]);
+  pi->primitives.limiter.v[2][1] =
+      fmax(pj->primitives.v[2], pi->primitives.limiter.v[2][1]);
+
+  pi->primitives.limiter.P[0] =
+      fmin(pj->primitives.P, pi->primitives.limiter.P[0]);
+  pi->primitives.limiter.P[1] =
+      fmax(pj->primitives.P, pi->primitives.limiter.P[1]);
+
+  pi->primitives.limiter.maxr = fmax(r, pi->primitives.limiter.maxr);
+#endif
+}
+
+__attribute__((always_inline)) INLINE static void runner_iact_hydro_loop2(
+    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
+
+#ifndef SPH_GRADIENTS
+
+  float r = sqrtf(r2);
+  float xi, xj;
+  float hi_inv, hj_inv;
+  float wi, wj, wi_dx, wj_dx;
+  int k, l;
+  float Bi[3][3];
+  float Bj[3][3];
+  GFLOAT Wi[5], Wj[5];
+
+  /* Initialize local variables */
+  for (k = 0; k < 3; k++) {
+    for (l = 0; l < 3; l++) {
+      Bi[k][l] = pi->geometry.matrix_E[k][l];
+      Bj[k][l] = pj->geometry.matrix_E[k][l];
+    }
+  }
+  Wi[0] = pi->primitives.rho;
+  Wi[1] = pi->primitives.v[0];
+  Wi[2] = pi->primitives.v[1];
+  Wi[3] = pi->primitives.v[2];
+  Wi[4] = pi->primitives.P;
+  Wj[0] = pj->primitives.rho;
+  Wj[1] = pj->primitives.v[0];
+  Wj[2] = pj->primitives.v[1];
+  Wj[3] = pj->primitives.v[2];
+  Wj[4] = pj->primitives.P;
+
+  /* Compute kernel of pi. */
+  hi_inv = 1.0 / hi;
+  xi = r * hi_inv;
+  kernel_deval(xi, &wi, &wi_dx);
+
+  /* Compute gradients for pi */
+  /* there is a sign difference w.r.t. eqn. (6) because of the inverse
+   * definition of dx */
+  pi->primitives.gradients.rho[0] +=
+      (Wi[0] - Wj[0]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.rho[1] +=
+      (Wi[0] - Wj[0]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.rho[2] +=
+      (Wi[0] - Wj[0]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+  pi->primitives.gradients.v[0][0] +=
+      (Wi[1] - Wj[1]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.v[0][1] +=
+      (Wi[1] - Wj[1]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.v[0][2] +=
+      (Wi[1] - Wj[1]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+  pi->primitives.gradients.v[1][0] +=
+      (Wi[2] - Wj[2]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.v[1][1] +=
+      (Wi[2] - Wj[2]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.v[1][2] +=
+      (Wi[2] - Wj[2]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+  pi->primitives.gradients.v[2][0] +=
+      (Wi[3] - Wj[3]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.v[2][1] +=
+      (Wi[3] - Wj[3]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.v[2][2] +=
+      (Wi[3] - Wj[3]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+  pi->primitives.gradients.P[0] +=
+      (Wi[4] - Wj[4]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.P[1] +=
+      (Wi[4] - Wj[4]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.P[2] +=
+      (Wi[4] - Wj[4]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+  /* basic slope limiter: collect the maximal and the minimal value for the
+   * primitive variables among the ngbs */
+  pi->primitives.limiter.rho[0] =
+      fmin(pj->primitives.rho, pi->primitives.limiter.rho[0]);
+  pi->primitives.limiter.rho[1] =
+      fmax(pj->primitives.rho, pi->primitives.limiter.rho[1]);
+
+  pi->primitives.limiter.v[0][0] =
+      fmin(pj->primitives.v[0], pi->primitives.limiter.v[0][0]);
+  pi->primitives.limiter.v[0][1] =
+      fmax(pj->primitives.v[0], pi->primitives.limiter.v[0][1]);
+  pi->primitives.limiter.v[1][0] =
+      fmin(pj->primitives.v[1], pi->primitives.limiter.v[1][0]);
+  pi->primitives.limiter.v[1][1] =
+      fmax(pj->primitives.v[1], pi->primitives.limiter.v[1][1]);
+  pi->primitives.limiter.v[2][0] =
+      fmin(pj->primitives.v[2], pi->primitives.limiter.v[2][0]);
+  pi->primitives.limiter.v[2][1] =
+      fmax(pj->primitives.v[2], pi->primitives.limiter.v[2][1]);
+
+  pi->primitives.limiter.P[0] =
+      fmin(pj->primitives.P, pi->primitives.limiter.P[0]);
+  pi->primitives.limiter.P[1] =
+      fmax(pj->primitives.P, pi->primitives.limiter.P[1]);
+
+  pi->primitives.limiter.maxr = fmax(r, pi->primitives.limiter.maxr);
+
+  /* Compute kernel of pj. */
+  hj_inv = 1.0 / hj;
+  xj = r * hj_inv;
+  kernel_deval(xj, &wj, &wj_dx);
+
+  /* Compute gradients for pj */
+  /* there is no sign difference w.r.t. eqn. (6) because dx is now what we want
+   * it to be */
+  pj->primitives.gradients.rho[0] +=
+      (Wi[0] - Wj[0]) * wj *
+      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+  pj->primitives.gradients.rho[1] +=
+      (Wi[0] - Wj[0]) * wj *
+      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+  pj->primitives.gradients.rho[2] +=
+      (Wi[0] - Wj[0]) * wj *
+      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+
+  pj->primitives.gradients.v[0][0] +=
+      (Wi[1] - Wj[1]) * wj *
+      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+  pj->primitives.gradients.v[0][1] +=
+      (Wi[1] - Wj[1]) * wj *
+      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+  pj->primitives.gradients.v[0][2] +=
+      (Wi[1] - Wj[1]) * wj *
+      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+  pj->primitives.gradients.v[1][0] +=
+      (Wi[2] - Wj[2]) * wj *
+      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+  pj->primitives.gradients.v[1][1] +=
+      (Wi[2] - Wj[2]) * wj *
+      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+  pj->primitives.gradients.v[1][2] +=
+      (Wi[2] - Wj[2]) * wj *
+      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+  pj->primitives.gradients.v[2][0] +=
+      (Wi[3] - Wj[3]) * wj *
+      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+  pj->primitives.gradients.v[2][1] +=
+      (Wi[3] - Wj[3]) * wj *
+      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+  pj->primitives.gradients.v[2][2] +=
+      (Wi[3] - Wj[3]) * wj *
+      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+
+  pj->primitives.gradients.P[0] +=
+      (Wi[4] - Wj[4]) * wj *
+      (Bj[0][0] * dx[0] + Bj[0][1] * dx[1] + Bj[0][2] * dx[2]);
+  pj->primitives.gradients.P[1] +=
+      (Wi[4] - Wj[4]) * wj *
+      (Bj[1][0] * dx[0] + Bj[1][1] * dx[1] + Bj[1][2] * dx[2]);
+  pj->primitives.gradients.P[2] +=
+      (Wi[4] - Wj[4]) * wj *
+      (Bj[2][0] * dx[0] + Bj[2][1] * dx[1] + Bj[2][2] * dx[2]);
+
+  /* basic slope limiter: collect the maximal and the minimal value for the
+   * primitive variables among the ngbs */
+  pj->primitives.limiter.rho[0] =
+      fmin(pi->primitives.rho, pj->primitives.limiter.rho[0]);
+  pj->primitives.limiter.rho[1] =
+      fmax(pi->primitives.rho, pj->primitives.limiter.rho[1]);
+
+  pj->primitives.limiter.v[0][0] =
+      fmin(pi->primitives.v[0], pj->primitives.limiter.v[0][0]);
+  pj->primitives.limiter.v[0][1] =
+      fmax(pi->primitives.v[0], pj->primitives.limiter.v[0][1]);
+  pj->primitives.limiter.v[1][0] =
+      fmin(pi->primitives.v[1], pj->primitives.limiter.v[1][0]);
+  pj->primitives.limiter.v[1][1] =
+      fmax(pi->primitives.v[1], pj->primitives.limiter.v[1][1]);
+  pj->primitives.limiter.v[2][0] =
+      fmin(pi->primitives.v[2], pj->primitives.limiter.v[2][0]);
+  pj->primitives.limiter.v[2][1] =
+      fmax(pi->primitives.v[2], pj->primitives.limiter.v[2][1]);
+
+  pj->primitives.limiter.P[0] =
+      fmin(pi->primitives.P, pj->primitives.limiter.P[0]);
+  pj->primitives.limiter.P[1] =
+      fmax(pi->primitives.P, pj->primitives.limiter.P[1]);
+
+  pj->primitives.limiter.maxr = fmax(r, pj->primitives.limiter.maxr);
+
+#endif
+}
+
+__attribute__((always_inline))
+    INLINE static void runner_iact_nonsym_hydro_loop2(float r2, float *dx,
+                                                      float hi, float hj,
+                                                      struct part *pi,
+                                                      struct part *pj) {
+
+#ifndef SPH_GRADIENTS
+
+  float r = sqrtf(r2);
+  float xi;
+  float hi_inv;
+  float wi, wi_dx;
+  int k, l;
+  float Bi[3][3];
+  GFLOAT Wi[5], Wj[5];
+
+  /* Initialize local variables */
+  for (k = 0; k < 3; k++) {
+    for (l = 0; l < 3; l++) {
+      Bi[k][l] = pi->geometry.matrix_E[k][l];
+    }
+  }
+  Wi[0] = pi->primitives.rho;
+  Wi[1] = pi->primitives.v[0];
+  Wi[2] = pi->primitives.v[1];
+  Wi[3] = pi->primitives.v[2];
+  Wi[4] = pi->primitives.P;
+  Wj[0] = pj->primitives.rho;
+  Wj[1] = pj->primitives.v[0];
+  Wj[2] = pj->primitives.v[1];
+  Wj[3] = pj->primitives.v[2];
+  Wj[4] = pj->primitives.P;
+
+  /* Compute kernel of pi. */
+  hi_inv = 1.0 / hi;
+  xi = r * hi_inv;
+  kernel_deval(xi, &wi, &wi_dx);
+
+  /* Compute gradients for pi */
+  /* there is a sign difference w.r.t. eqn. (6) because of the inverse
+   * definition of dx */
+  pi->primitives.gradients.rho[0] +=
+      (Wi[0] - Wj[0]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.rho[1] +=
+      (Wi[0] - Wj[0]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.rho[2] +=
+      (Wi[0] - Wj[0]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+  pi->primitives.gradients.v[0][0] +=
+      (Wi[1] - Wj[1]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.v[0][1] +=
+      (Wi[1] - Wj[1]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.v[0][2] +=
+      (Wi[1] - Wj[1]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+  pi->primitives.gradients.v[1][0] +=
+      (Wi[2] - Wj[2]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.v[1][1] +=
+      (Wi[2] - Wj[2]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.v[1][2] +=
+      (Wi[2] - Wj[2]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+  pi->primitives.gradients.v[2][0] +=
+      (Wi[3] - Wj[3]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.v[2][1] +=
+      (Wi[3] - Wj[3]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.v[2][2] +=
+      (Wi[3] - Wj[3]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+  pi->primitives.gradients.P[0] +=
+      (Wi[4] - Wj[4]) * wi *
+      (Bi[0][0] * dx[0] + Bi[0][1] * dx[1] + Bi[0][2] * dx[2]);
+  pi->primitives.gradients.P[1] +=
+      (Wi[4] - Wj[4]) * wi *
+      (Bi[1][0] * dx[0] + Bi[1][1] * dx[1] + Bi[1][2] * dx[2]);
+  pi->primitives.gradients.P[2] +=
+      (Wi[4] - Wj[4]) * wi *
+      (Bi[2][0] * dx[0] + Bi[2][1] * dx[1] + Bi[2][2] * dx[2]);
+
+  /* slope limiter */
+  pi->primitives.limiter.rho[0] =
+      fmin(pj->primitives.rho, pi->primitives.limiter.rho[0]);
+  pi->primitives.limiter.rho[1] =
+      fmax(pj->primitives.rho, pi->primitives.limiter.rho[1]);
+
+  pi->primitives.limiter.v[0][0] =
+      fmin(pj->primitives.v[0], pi->primitives.limiter.v[0][0]);
+  pi->primitives.limiter.v[0][1] =
+      fmax(pj->primitives.v[0], pi->primitives.limiter.v[0][1]);
+  pi->primitives.limiter.v[1][0] =
+      fmin(pj->primitives.v[1], pi->primitives.limiter.v[1][0]);
+  pi->primitives.limiter.v[1][1] =
+      fmax(pj->primitives.v[1], pi->primitives.limiter.v[1][1]);
+  pi->primitives.limiter.v[2][0] =
+      fmin(pj->primitives.v[2], pi->primitives.limiter.v[2][0]);
+  pi->primitives.limiter.v[2][1] =
+      fmax(pj->primitives.v[2], pi->primitives.limiter.v[2][1]);
+
+  pi->primitives.limiter.P[0] =
+      fmin(pj->primitives.P, pi->primitives.limiter.P[0]);
+  pi->primitives.limiter.P[1] =
+      fmax(pj->primitives.P, pi->primitives.limiter.P[1]);
+
+  pi->primitives.limiter.maxr = fmax(r, pi->primitives.limiter.maxr);
+
+#endif
+}
+
+__attribute__((always_inline)) INLINE static void runner_iact_fluxes_common(
+    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj,
+    int mode) {
+
+  float r = sqrtf(r2);
+  float xi, xj;
+  float hi_inv, hi_inv2;
+  float hj_inv, hj_inv2;
+  float wi, wj, wi_dx, wj_dx;
+  int k, l;
+  float A[3];
+  float Anorm;
+  float Bi[3][3];
+  float Bj[3][3];
+  float Vi, Vj;
+  float xij_i[3], xfac, xijdotdx;
+  float vmax, dvdotdx;
+  float vi[3], vj[3], vij[3];
+  GFLOAT Wi[5], Wj[5];  //, Whalf[5];
+#ifdef USE_GRADIENTS
+  GFLOAT dWi[5], dWj[5];
+  float xij_j[3];
+#endif
+  //    GFLOAT rhoe;
+  //    GFLOAT flux[5][3];
+  float dti, dtj, mindt;
+  float n_unit[3];
+
+  /* Initialize local variables */
+  for (k = 0; k < 3; k++) {
+    for (l = 0; l < 3; l++) {
+      Bi[k][l] = pi->geometry.matrix_E[k][l];
+      Bj[k][l] = pj->geometry.matrix_E[k][l];
+    }
+    vi[k] = pi->v[k]; /* particle velocities */
+    vj[k] = pj->v[k];
+  }
+  Vi = pi->geometry.volume;
+  Vj = pj->geometry.volume;
+  Wi[0] = pi->primitives.rho;
+  Wi[1] = pi->primitives.v[0];
+  Wi[2] = pi->primitives.v[1];
+  Wi[3] = pi->primitives.v[2];
+  Wi[4] = pi->primitives.P;
+  Wj[0] = pj->primitives.rho;
+  Wj[1] = pj->primitives.v[0];
+  Wj[2] = pj->primitives.v[1];
+  Wj[3] = pj->primitives.v[2];
+  Wj[4] = pj->primitives.P;
+  dti = pi->ti_end - pi->ti_begin;  // MATTHIEU
+  dtj = pj->ti_end - pj->ti_begin;
+
+  //    if(dti > 1.e-7 || dtj > 1.e-7){
+  //        message("Timestep too large: %g %g!", dti, dtj);
+  //    }
+
+  /* calculate the maximal signal velocity */
+  vmax = sqrtf(const_hydro_gamma * Wi[4] / Wi[0]) +
+         sqrtf(const_hydro_gamma * Wj[4] / Wj[0]);
+  dvdotdx = (Wi[1] - Wj[1]) * dx[0] + (Wi[2] - Wj[2]) * dx[1] +
+            (Wi[3] - Wj[3]) * dx[2];
+  if (dvdotdx > 0.) {
+    vmax -= dvdotdx / r;
+  }
+  pi->timestepvars.vmax = fmaxf(pi->timestepvars.vmax, vmax);
+  if (mode == 1) {
+    pj->timestepvars.vmax = fmaxf(pj->timestepvars.vmax, vmax);
+  }
+
+  /* The flux will be exchanged using the smallest time step of the two
+   * particles */
+  mindt = fminf(dti, dtj);
+
+  /* Compute kernel of pi. */
+  hi_inv = 1.0 / hi;
+  hi_inv2 = hi_inv * hi_inv;
+  xi = r * hi_inv;
+  kernel_deval(xi, &wi, &wi_dx);
+
+  /* Compute kernel of pj. */
+  hj_inv = 1.0 / hj;
+  hj_inv2 = hj_inv * hj_inv;
+  xj = r * hj_inv;
+  kernel_deval(xj, &wj, &wj_dx);
+
+  /* Compute area */
+  /* eqn. (7) */
+  Anorm = 0.0f;
+  for (k = 0; k < 3; k++) {
+    /* we add a minus sign since dx is pi->x - pj->x */
+    A[k] = -Vi * (Bi[k][0] * dx[0] + Bi[k][1] * dx[1] + Bi[k][2] * dx[2]) * wi *
+               hi_inv * hi_inv2 -
+           Vj * (Bj[k][0] * dx[0] + Bj[k][1] * dx[1] + Bj[k][2] * dx[2]) * wj *
+               hj_inv * hj_inv2;
+    Anorm += A[k] * A[k];
+  }
+
+  if (!Anorm) {
+    /* if the interface has no area, nothing happens and we return */
+    /* continuing results in dividing by zero and NaN's... */
+    return;
+  }
+
+  /* compute the normal vector of the interface */
+  Anorm = sqrtf(Anorm);
+  for (k = 0; k < 3; k++) n_unit[k] = A[k] / Anorm;
+
+#ifdef PRINT_ID
+  if (pi->id == PRINT_ID || pj->id == PRINT_ID) {
+    printf("pi: %g %g %g\npj: %g %g %g\nA = %g %g %g\n", pi->x[0], pi->x[1],
+           pi->x[2], pj->x[0], pj->x[1], pj->x[2], A[0], A[1], A[2]);
+  }
+#endif
+
+  /* Compute interface position (relative to pi, since we don't need the actual
+   * position) */
+  /* eqn. (8) */
+  xfac = hi / (hi + hj);
+  for (k = 0; k < 3; k++) xij_i[k] = -xfac * dx[k];
+
+  /* Compute interface velocity */
+  /* eqn. (9) */
+  xijdotdx = xij_i[0] * dx[0] + xij_i[1] * dx[1] + xij_i[2] * dx[2];
+  for (k = 0; k < 3; k++) vij[k] = vi[k] + (vi[k] - vj[k]) * xijdotdx / r2;
+
+  /* complete calculation of position of interface */
+  /* NOTE: dx is not necessarily just pi->x - pj->x but can also contain
+           correction terms for periodicity. If we do the interpolation,
+           we have to use xij w.r.t. the actual particle.
+           => we need a separate xij for pi and pj... */
+  /* tldr: we do not need the code below, but we do need the same code as above
+     but then
+     with i and j swapped */
+  //    for ( k = 0 ; k < 3 ; k++ )
+  //      xij[k] += pi->x[k];
+
+  /* Boost the primitive variables to the frame of reference of the interface */
+  /* Note that velocities are indices 1-3 in W */
+  Wi[1] -= vij[0];
+  Wi[2] -= vij[1];
+  Wi[3] -= vij[2];
+  Wj[1] -= vij[0];
+  Wj[2] -= vij[1];
+  Wj[3] -= vij[2];
+
+#ifdef USE_GRADIENTS
+  /* perform gradient reconstruction in space and time */
+  /* space */
+  /* Compute interface position (relative to pj, since we don't need the actual
+   * position) */
+  /* eqn. (8) */
+  xfac = hj / (hi + hj);
+  for (k = 0; k < 3; k++) xij_j[k] = xfac * dx[k];
+
+  dWi[0] = pi->primitives.gradients.rho[0] * xij_i[0] +
+           pi->primitives.gradients.rho[1] * xij_i[1] +
+           pi->primitives.gradients.rho[2] * xij_i[2];
+  dWi[1] = pi->primitives.gradients.v[0][0] * xij_i[0] +
+           pi->primitives.gradients.v[0][1] * xij_i[1] +
+           pi->primitives.gradients.v[0][2] * xij_i[2];
+  dWi[2] = pi->primitives.gradients.v[1][0] * xij_i[0] +
+           pi->primitives.gradients.v[1][1] * xij_i[1] +
+           pi->primitives.gradients.v[1][2] * xij_i[2];
+  dWi[3] = pi->primitives.gradients.v[2][0] * xij_i[0] +
+           pi->primitives.gradients.v[2][1] * xij_i[1] +
+           pi->primitives.gradients.v[2][2] * xij_i[2];
+  dWi[4] = pi->primitives.gradients.P[0] * xij_i[0] +
+           pi->primitives.gradients.P[1] * xij_i[1] +
+           pi->primitives.gradients.P[2] * xij_i[2];
+
+  dWj[0] = pj->primitives.gradients.rho[0] * xij_j[0] +
+           pj->primitives.gradients.rho[1] * xij_j[1] +
+           pj->primitives.gradients.rho[2] * xij_j[2];
+  dWj[1] = pj->primitives.gradients.v[0][0] * xij_j[0] +
+           pj->primitives.gradients.v[0][1] * xij_j[1] +
+           pj->primitives.gradients.v[0][2] * xij_j[2];
+  dWj[2] = pj->primitives.gradients.v[1][0] * xij_j[0] +
+           pj->primitives.gradients.v[1][1] * xij_j[1] +
+           pj->primitives.gradients.v[1][2] * xij_j[2];
+  dWj[3] = pj->primitives.gradients.v[2][0] * xij_j[0] +
+           pj->primitives.gradients.v[2][1] * xij_j[1] +
+           pj->primitives.gradients.v[2][2] * xij_j[2];
+  dWj[4] = pj->primitives.gradients.P[0] * xij_j[0] +
+           pj->primitives.gradients.P[1] * xij_j[1] +
+           pj->primitives.gradients.P[2] * xij_j[2];
+
+#ifdef PER_FACE_LIMITER
+
+  float xij_i_norm;
+  GFLOAT phi_i, phi_j;
+  GFLOAT delta1, delta2;
+  GFLOAT phiminus, phiplus;
+  GFLOAT phimin, phimax;
+  GFLOAT phibar;
+  /* free parameters, values from Hopkins */
+  GFLOAT psi1 = 0.5, psi2 = 0.25;
+  GFLOAT phi_mid0, phi_mid;
+
+  for (k = 0; k < 10; k++) {
+    if (k < 5) {
+      phi_i = Wi[k];
+      phi_j = Wj[k];
+      phi_mid0 = Wi[k] + dWi[k];
+      xij_i_norm = sqrtf(xij_i[0] * xij_i[0] + xij_i[1] * xij_i[1] +
+                         xij_i[2] * xij_i[2]);
+    } else {
+      phi_i = Wj[k - 5];
+      phi_j = Wi[k - 5];
+      phi_mid0 = Wj[k - 5] + dWj[k - 5];
+      xij_i_norm = sqrtf(xij_j[0] * xij_j[0] + xij_j[1] * xij_j[1] +
+                         xij_j[2] * xij_j[2]);
+    }
+
+    delta1 = psi1 * fabs(phi_i - phi_j);
+    delta2 = psi2 * fabs(phi_i - phi_j);
+
+    phimin = fmin(phi_i, phi_j);
+    phimax = fmax(phi_i, phi_j);
+
+    phibar = phi_i + xij_i_norm / r * (phi_j - phi_i);
+
+    /* if sign(phimax+delta1) == sign(phimax) */
+    if ((phimax + delta1) * phimax > 0.0f) {
+      phiplus = phimax + delta1;
+    } else {
+      phiplus = phimax / (1.0f + delta1 / fabs(phimax));
+    }
+
+    /* if sign(phimin-delta1) == sign(phimin) */
+    if ((phimin - delta1) * phimin > 0.0f) {
+      phiminus = phimin - delta1;
+    } else {
+      phiminus = phimin / (1.0f + delta1 / fabs(phimin));
+    }
+
+    if (phi_i == phi_j) {
+      phi_mid = phi_i;
+    } else {
+      if (phi_i < phi_j) {
+        phi_mid = fmax(phiminus, fmin(phibar + delta2, phi_mid0));
+      } else {
+        phi_mid = fmin(phiplus, fmax(phibar - delta2, phi_mid0));
+      }
+    }
+
+    if (k < 5) {
+      dWi[k] = phi_mid - phi_i;
+    } else {
+      dWj[k - 5] = phi_mid - phi_i;
+    }
+  }
+
+#endif
+
+  //    printf("dWL: %g %g %g %g %g\n", dWi[0], dWi[1], dWi[2], dWi[3], dWi[4]);
+  //    printf("dWR: %g %g %g %g %g\n", dWj[0], dWj[1], dWj[2], dWj[3], dWj[4]);
+
+  /* time */
+  dWi[0] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.rho[0] +
+                           Wi[2] * pi->primitives.gradients.rho[1] +
+                           Wi[3] * pi->primitives.gradients.rho[2] +
+                           Wi[0] * (pi->primitives.gradients.v[0][0] +
+                                    pi->primitives.gradients.v[1][1] +
+                                    pi->primitives.gradients.v[2][2]));
+  dWi[1] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.v[0][0] +
+                           Wi[2] * pi->primitives.gradients.v[0][1] +
+                           Wi[3] * pi->primitives.gradients.v[0][2] +
+                           pi->primitives.gradients.P[0] / Wi[0]);
+  dWi[2] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.v[1][0] +
+                           Wi[2] * pi->primitives.gradients.v[1][1] +
+                           Wi[3] * pi->primitives.gradients.v[1][2] +
+                           pi->primitives.gradients.P[1] / Wi[0]);
+  dWi[3] -= 0.5 * mindt * (Wi[1] * pi->primitives.gradients.v[2][0] +
+                           Wi[2] * pi->primitives.gradients.v[2][1] +
+                           Wi[3] * pi->primitives.gradients.v[2][2] +
+                           pi->primitives.gradients.P[2] / Wi[0]);
+  dWi[4] -= 0.5 * mindt *
+            (Wi[1] * pi->primitives.gradients.P[0] +
+             Wi[2] * pi->primitives.gradients.P[1] +
+             Wi[3] * pi->primitives.gradients.P[2] +
+             const_hydro_gamma * Wi[4] * (pi->primitives.gradients.v[0][0] +
+                                          pi->primitives.gradients.v[1][1] +
+                                          pi->primitives.gradients.v[2][2]));
+
+  dWj[0] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.rho[0] +
+                           Wj[2] * pj->primitives.gradients.rho[1] +
+                           Wj[3] * pj->primitives.gradients.rho[2] +
+                           Wj[0] * (pj->primitives.gradients.v[0][0] +
+                                    pj->primitives.gradients.v[1][1] +
+                                    pj->primitives.gradients.v[2][2]));
+  dWj[1] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.v[0][0] +
+                           Wj[2] * pj->primitives.gradients.v[0][1] +
+                           Wj[3] * pj->primitives.gradients.v[0][2] +
+                           pj->primitives.gradients.P[0] / Wj[0]);
+  dWj[2] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.v[1][0] +
+                           Wj[2] * pj->primitives.gradients.v[1][1] +
+                           Wj[3] * pj->primitives.gradients.v[1][2] +
+                           pj->primitives.gradients.P[1] / Wj[0]);
+  dWj[3] -= 0.5 * mindt * (Wj[1] * pj->primitives.gradients.v[2][0] +
+                           Wj[2] * pj->primitives.gradients.v[2][1] +
+                           Wj[3] * pj->primitives.gradients.v[2][2] +
+                           pj->primitives.gradients.P[2] / Wj[0]);
+  dWj[4] -= 0.5 * mindt *
+            (Wj[1] * pj->primitives.gradients.P[0] +
+             Wj[2] * pj->primitives.gradients.P[1] +
+             Wj[3] * pj->primitives.gradients.P[2] +
+             const_hydro_gamma * Wj[4] * (pj->primitives.gradients.v[0][0] +
+                                          pj->primitives.gradients.v[1][1] +
+                                          pj->primitives.gradients.v[2][2]));
+
+  //    printf("WL: %g %g %g %g %g\n", Wi[0], Wi[1], Wi[2], Wi[3], Wi[4]);
+  //    printf("WR: %g %g %g %g %g\n", Wj[0], Wj[1], Wj[2], Wj[3], Wj[4]);
+
+  //    printf("dWL: %g %g %g %g %g\n", dWi[0], dWi[1], dWi[2], dWi[3], dWi[4]);
+  //    printf("dWR: %g %g %g %g %g\n", dWj[0], dWj[1], dWj[2], dWj[3], dWj[4]);
+
+  Wi[0] += dWi[0];
+  Wi[1] += dWi[1];
+  Wi[2] += dWi[2];
+  Wi[3] += dWi[3];
+  Wi[4] += dWi[4];
+
+  Wj[0] += dWj[0];
+  Wj[1] += dWj[1];
+  Wj[2] += dWj[2];
+  Wj[3] += dWj[3];
+  Wj[4] += dWj[4];
+#endif
+
+  /* apply slope limiter interface by interface */
+  /* ... to be done ... */
+
+  /* we don't need to rotate, we can use the unit vector in the Riemann problem
+   * itself (see GIZMO) */
+
+  if (Wi[0] < 0.0f || Wj[0] < 0.0f || Wi[4] < 0.0f || Wj[4] < 0.0f) {
+    printf("mindt: %g\n", mindt);
+    printf("WL: %g %g %g %g %g\n", pi->primitives.rho, pi->primitives.v[0],
+           pi->primitives.v[1], pi->primitives.v[2], pi->primitives.P);
+    printf("dWL: %g %g %g %g %g\n", dWi[0], dWi[1], dWi[2], dWi[3], dWi[4]);
+    printf("gradWL[0]: %g %g %g\n", pi->primitives.gradients.rho[0],
+           pi->primitives.gradients.rho[1], pi->primitives.gradients.rho[2]);
+    printf("gradWL[1]: %g %g %g\n", pi->primitives.gradients.v[0][0],
+           pi->primitives.gradients.v[0][1], pi->primitives.gradients.v[0][2]);
+    printf("gradWL[2]: %g %g %g\n", pi->primitives.gradients.v[1][0],
+           pi->primitives.gradients.v[1][1], pi->primitives.gradients.v[1][2]);
+    printf("gradWL[3]: %g %g %g\n", pi->primitives.gradients.v[2][0],
+           pi->primitives.gradients.v[2][1], pi->primitives.gradients.v[2][2]);
+    printf("gradWL[4]: %g %g %g\n", pi->primitives.gradients.P[0],
+           pi->primitives.gradients.P[1], pi->primitives.gradients.P[2]);
+    printf("WL': %g %g %g %g %g\n", Wi[0], Wi[1], Wi[2], Wi[3], Wi[4]);
+    printf("WR: %g %g %g %g %g\n", pj->primitives.rho, pj->primitives.v[0],
+           pj->primitives.v[1], pj->primitives.v[2], pj->primitives.P);
+    printf("dWR: %g %g %g %g %g\n", dWj[0], dWj[1], dWj[2], dWj[3], dWj[4]);
+    printf("gradWR[0]: %g %g %g\n", pj->primitives.gradients.rho[0],
+           pj->primitives.gradients.rho[1], pj->primitives.gradients.rho[2]);
+    printf("gradWR[1]: %g %g %g\n", pj->primitives.gradients.v[0][0],
+           pj->primitives.gradients.v[0][1], pj->primitives.gradients.v[0][2]);
+    printf("gradWR[2]: %g %g %g\n", pj->primitives.gradients.v[1][0],
+           pj->primitives.gradients.v[1][1], pj->primitives.gradients.v[1][2]);
+    printf("gradWR[3]: %g %g %g\n", pj->primitives.gradients.v[2][0],
+           pj->primitives.gradients.v[2][1], pj->primitives.gradients.v[2][2]);
+    printf("gradWR[4]: %g %g %g\n", pj->primitives.gradients.P[0],
+           pj->primitives.gradients.P[1], pj->primitives.gradients.P[2]);
+    printf("WR': %g %g %g %g %g\n", Wj[0], Wj[1], Wj[2], Wj[3], Wj[4]);
+    error("Negative density or pressure!\n");
+  }
+
+  GFLOAT totflux[5];
+  riemann_solve_for_flux(Wi, Wj, n_unit, vij, totflux);
+
+  /* Update conserved variables */
+  /* eqn. (16) */
+  pi->conserved.mass -= mindt * Anorm * totflux[0];
+  pi->conserved.momentum[0] -= mindt * Anorm * totflux[1];
+  pi->conserved.momentum[1] -= mindt * Anorm * totflux[2];
+  pi->conserved.momentum[2] -= mindt * Anorm * totflux[3];
+  pi->conserved.energy -= mindt * Anorm * totflux[4];
+
+#ifdef THERMAL_ENERGY
+  float ekin = 0.5 * (pi->primitives.v[0] * pi->primitives.v[0] +
+                      pi->primitives.v[1] * pi->primitives.v[1] +
+                      pi->primitives.v[2] * pi->primitives.v[2]);
+  pi->conserved.energy += mindt * Anorm * totflux[1] * pi->primitives.v[0];
+  pi->conserved.energy += mindt * Anorm * totflux[2] * pi->primitives.v[1];
+  pi->conserved.energy += mindt * Anorm * totflux[3] * pi->primitives.v[2];
+  pi->conserved.energy -= mindt * Anorm * totflux[0] * ekin;
+#endif
+
+  /* the non symmetric version is never called when using mindt, whether this
+   * piece of code
+   * should always be executed or only in the symmetric case is currently
+   * unclear */
+  if (mode == 1) {
+    pj->conserved.mass += mindt * Anorm * totflux[0];
+    pj->conserved.momentum[0] += mindt * Anorm * totflux[1];
+    pj->conserved.momentum[1] += mindt * Anorm * totflux[2];
+    pj->conserved.momentum[2] += mindt * Anorm * totflux[3];
+    pj->conserved.energy += mindt * Anorm * totflux[4];
+
+#ifdef THERMAL_ENERGY
+    ekin = 0.5 * (pj->primitives.v[0] * pj->primitives.v[0] +
+                  pj->primitives.v[1] * pj->primitives.v[1] +
+                  pj->primitives.v[2] * pj->primitives.v[2]);
+    pj->conserved.energy -= mindt * Anorm * totflux[1] * pj->primitives.v[0];
+    pj->conserved.energy -= mindt * Anorm * totflux[2] * pj->primitives.v[1];
+    pj->conserved.energy -= mindt * Anorm * totflux[3] * pj->primitives.v[2];
+    pj->conserved.energy += mindt * Anorm * totflux[0] * ekin;
+#endif
+  }
+}
+
+/* this corresponds to task_subtype_fluxes */
+__attribute__((always_inline)) INLINE static void runner_iact_hydro_loop3(
+    float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) {
+
+  runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 1);
+}
+
+/* this corresponds to task_subtype_fluxes */
+__attribute__((always_inline))
+    INLINE static void runner_iact_nonsym_hydro_loop3(float r2, float *dx,
+                                                      float hi, float hj,
+                                                      struct part *pi,
+                                                      struct part *pj) {
+
+  runner_iact_fluxes_common(r2, dx, hi, hj, pi, pj, 0);
+}
diff --git a/src/hydro/Gizmo/hydro_io.h b/src/hydro/Gizmo/hydro_io.h
new file mode 100644
index 0000000000000000000000000000000000000000..3c51653d994bd9f01864bcc24c6886eba25d1d05
--- /dev/null
+++ b/src/hydro/Gizmo/hydro_io.h
@@ -0,0 +1,120 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/**
+ * @brief Reads the different particles to the HDF5 file
+ *
+ * @param h_grp The HDF5 group in which to read the arrays.
+ * @param N The number of particles on that MPI rank.
+ * @param N_total The total number of particles (only used in MPI mode)
+ * @param offset The offset of the particles for this MPI rank (only used in MPI
+ *mode)
+ * @param parts The particle array
+ *
+ */
+__attribute__((always_inline)) INLINE static void hydro_read_particles(
+    hid_t h_grp, int N, long long N_total, long long offset,
+    struct part* parts) {
+
+  /* Read arrays */
+  readArray(h_grp, "Coordinates", DOUBLE, N, 3, parts, N_total, offset, x,
+            COMPULSORY);
+  readArray(h_grp, "Velocities", FLOAT, N, 3, parts, N_total, offset, v,
+            COMPULSORY);
+  readArray(h_grp, "Masses", FLOAT, N, 1, parts, N_total, offset,
+            conserved.mass, COMPULSORY);
+  readArray(h_grp, "SmoothingLength", FLOAT, N, 1, parts, N_total, offset, h,
+            COMPULSORY);
+  readArray(h_grp, "InternalEnergy", FLOAT, N, 1, parts, N_total, offset,
+            primitives.P, COMPULSORY);
+  readArray(h_grp, "ParticleIDs", ULONGLONG, N, 1, parts, N_total, offset, id,
+            COMPULSORY);
+  readArray(h_grp, "Acceleration", FLOAT, N, 3, parts, N_total, offset, a_hydro,
+            OPTIONAL);
+  readArray(h_grp, "Density", FLOAT, N, 1, parts, N_total, offset,
+            primitives.rho, OPTIONAL);
+}
+
+/**
+ * @brief Writes the different particles to the HDF5 file
+ *
+ * @param h_grp The HDF5 group in which to write the arrays.
+ * @param fileName The name of the file (unsued in MPI mode).
+ * @param xmfFile The XMF file to write to (unused in MPI mode).
+ * @param N The number of particles on that MPI rank.
+ * @param N_total The total number of particles (only used in MPI mode)
+ * @param mpi_rank The MPI rank of this node (only used in MPI mode)
+ * @param offset The offset of the particles for this MPI rank (only used in MPI
+ *mode)
+ * @param parts The particle array
+ * @param us The unit system to use
+ *
+ */
+__attribute__((always_inline)) INLINE static void hydro_write_particles(
+    hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total,
+    int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) {
+
+  /* Write arrays */
+  writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts,
+             N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH);
+  writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts,
+             N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED);
+  writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total,
+             mpi_rank, offset, conserved.mass, us, UNIT_CONV_MASS);
+  writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts,
+             N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH);
+  writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts,
+             N_total, mpi_rank, offset, primitives.P, us,
+             UNIT_CONV_ENTROPY_PER_UNIT_MASS);
+  writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts,
+             N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS);
+  writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts,
+             N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION);
+  writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total,
+             mpi_rank, offset, primitives.rho, us, UNIT_CONV_DENSITY);
+}
+
+/**
+ * @brief Writes the current model of SPH to the file
+ * @param h_grpsph The HDF5 group in which to write
+ */
+void writeSPHflavour(hid_t h_grpsph) {
+
+  /* Kernel function description */
+  writeAttribute_s(h_grpsph, "Kernel", kernel_name);
+  writeAttribute_f(h_grpsph, "Kernel eta", const_eta_kernel);
+  writeAttribute_f(h_grpsph, "Weighted N_ngb", kernel_nwneigh);
+  writeAttribute_f(h_grpsph, "Delta N_ngb", const_delta_nwneigh);
+  writeAttribute_f(h_grpsph, "Hydro gamma", const_hydro_gamma);
+
+  /* Viscosity and thermal conduction */
+  writeAttribute_s(h_grpsph, "Thermal Conductivity Model",
+                   "(No treatment) Legacy Gadget-2 as in Springel (2005)");
+  writeAttribute_s(h_grpsph, "Viscosity Model",
+                   "Legacy Gadget-2 as in Springel (2005)");
+  writeAttribute_f(h_grpsph, "Viscosity alpha", const_viscosity_alpha);
+  writeAttribute_f(h_grpsph, "Viscosity beta", 3.f);
+
+  /* Time integration properties */
+  writeAttribute_f(h_grpsph, "CFL parameter", const_cfl);
+  writeAttribute_f(h_grpsph, "Maximal ln(Delta h) change over dt",
+                   const_ln_max_h_change);
+  writeAttribute_f(h_grpsph, "Maximal Delta h change over dt",
+                   exp(const_ln_max_h_change));
+}
diff --git a/src/hydro/Gizmo/hydro_part.h b/src/hydro/Gizmo/hydro_part.h
new file mode 100644
index 0000000000000000000000000000000000000000..9e5f32f758248d1d1616f4556c81fc8e0b52e83b
--- /dev/null
+++ b/src/hydro/Gizmo/hydro_part.h
@@ -0,0 +1,162 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *                    Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/* Some standard headers. */
+#include <stdlib.h>
+
+#define GFLOAT float
+
+/* Extra particle data not needed during the computation. */
+struct xpart {
+
+  /* Old position, at last tree rebuild. */
+  double x_old[3];
+
+  /* Velocity at the last full step. */
+  float v_full[3];
+
+  /* Entropy at the half-step. */
+  float u_hdt;
+
+  /* Old density. */
+  float omega;
+
+} __attribute__((aligned(xpart_align)));
+
+/* Data of a single particle. */
+struct part {
+
+  /* Particle position. */
+  double x[3];
+
+  /* Particle velocity. */
+  float v[3];
+
+  /* Particle acceleration. */
+  float a_hydro[3];
+
+  float mass;  // MATTHIEU
+  float h_dt;
+  float rho;
+  float rho_dh;
+
+  /* Particle cutoff radius. */
+  float h;
+
+  /* Particle time of beginning of time-step. */
+  int ti_begin;
+
+  /* Particle time of end of time-step. */
+  int ti_end;
+
+  /* The primitive hydrodynamical variables */
+  struct {
+
+    /* fluid velocity */
+    GFLOAT v[3];
+
+    /* density */
+    GFLOAT rho;
+
+    /* pressure */
+    GFLOAT P;
+
+    struct {
+
+      GFLOAT rho[3];
+
+      GFLOAT v[3][3];
+
+      GFLOAT P[3];
+
+    } gradients;
+
+    struct {
+
+      /* extreme values among the neighbours */
+      GFLOAT rho[2];
+
+      GFLOAT v[3][2];
+
+      GFLOAT P[2];
+
+      /* maximal distance to all neighbouring faces */
+      float maxr;
+
+    } limiter;
+
+  } primitives;
+
+  /* The conserved hydrodynamical variables */
+  struct {
+
+    /* fluid momentum */
+    GFLOAT momentum[3];
+
+    /* fluid mass */
+    GFLOAT mass;
+
+    /* fluid energy */
+    GFLOAT energy;
+
+  } conserved;
+
+  /* Geometrical quantities used for hydro */
+  struct {
+
+    /* volume of the particle */
+    float volume;
+
+    /* gradient matrix */
+    float matrix_E[3][3];
+
+  } geometry;
+
+  struct {
+
+    float vmax;
+
+  } timestepvars;
+
+  /* Quantities used during the density loop */
+  struct {
+
+    /* Particle velocity divergence. */
+    float div_v;
+
+    /* Derivative of particle number density. */
+    float wcount_dh;
+
+    /* Particle velocity curl. */
+    float curl_v[3];
+
+    /* Particle number density. */
+    float wcount;
+
+  } density;
+
+  /* Particle ID. */
+  unsigned long long id;
+
+  /* Associated gravitas. */
+  struct gpart *gpart;
+
+} __attribute__((aligned(part_align)));
diff --git a/src/riemann.h b/src/riemann.h
new file mode 100644
index 0000000000000000000000000000000000000000..ad37490d7249bafe776b58a609064cbd0bc23abe
--- /dev/null
+++ b/src/riemann.h
@@ -0,0 +1,44 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2015 Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+#ifndef SWIFT_RIEMANN_H
+#define SWIFT_RIEMANN_H
+
+/* gives us const_hydro_gamma and tells us which floating point type to use */
+#include "const.h"
+#include "math.h"
+#include "stdio.h"
+#include "float.h"
+#include "stdlib.h"
+#include "error.h"
+
+#define HLLC_SOLVER
+
+#ifdef EXACT_SOLVER
+#include "riemann/riemann_exact.h"
+#endif
+
+#ifdef TRRS_SOLVER
+#include "riemann/riemann_trrs.h"
+#endif
+
+#ifdef HLLC_SOLVER
+#include "riemann/riemann_hllc.h"
+#endif
+
+#endif /* SWIFT_RIEMANN_H */
diff --git a/src/riemann/riemann_exact.h b/src/riemann/riemann_exact.h
new file mode 100644
index 0000000000000000000000000000000000000000..861dad9729794efb302638792fef6e3df43c700a
--- /dev/null
+++ b/src/riemann/riemann_exact.h
@@ -0,0 +1,710 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2015 Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+/******************************************************************************
+ * The Riemann solver in this file was written by Bert Vandenbroucke as part of
+ * the moving mesh code Shadowfax and adapted for use with SWIFT. It consists
+ * of an exact Riemann solver as described in
+ *  Toro, Eleuterio F., Riemann Solvers and Numerical Methods for Fluid
+ *  Dynamics, Springer (2009, 3rd edition)
+ *
+ ******************************************************************************/
+
+#ifndef SWIFT_RIEMANN_EXACT_H
+#define SWIFT_RIEMANN_EXACT_H
+
+/* frequently used combinations of const_hydro_gamma */
+#define const_riemann_gp1d2g \
+  (0.5f * (const_hydro_gamma + 1.0f) / const_hydro_gamma)
+#define const_riemann_gm1d2g \
+  (0.5f * (const_hydro_gamma - 1.0f) / const_hydro_gamma)
+#define const_riemann_gm1dgp1 \
+  ((const_hydro_gamma - 1.0f) / (const_hydro_gamma + 1.0f))
+#define const_riemann_tdgp1 (2.0f / (const_hydro_gamma + 1.0f))
+#define const_riemann_tdgm1 (2.0f / (const_hydro_gamma - 1.0f))
+#define const_riemann_gm1d2 (0.5f * (const_hydro_gamma - 1.0f))
+#define const_riemann_tgdgm1 \
+  (2.0f * const_hydro_gamma / (const_hydro_gamma - 1.0f))
+#define const_riemann_ginv (1.0f / const_hydro_gamma)
+
+/**
+ * @brief Functions (4.6) and (4.7) in Toro.
+ *
+ * @param p The current guess for the pressure
+ * @param W The left or right state vector
+ * @param a The left or right sound speed
+ */
+__attribute__((always_inline)) INLINE static GFLOAT riemann_fb(GFLOAT p,
+                                                               GFLOAT* W,
+                                                               GFLOAT a) {
+
+  GFLOAT fval = 0.;
+  GFLOAT A, B;
+  if (p > W[4]) {
+    A = const_riemann_tdgp1 / W[0];
+    B = const_riemann_gm1dgp1 * W[4];
+    fval = (p - W[4]) * sqrtf(A / (p + B));
+  } else {
+    fval =
+        const_riemann_tdgm1 * a * (powf(p / W[4], const_riemann_gm1d2g) - 1.0f);
+  }
+  return fval;
+}
+
+/**
+ * @brief Function (4.5) in Toro
+ *
+ * @param p The current guess for the pressure
+ * @param WL The left state vector
+ * @param WR The right state vector
+ * @param vL The left velocity along the interface normal
+ * @param vR The right velocity along the interface normal
+ * @param aL The left sound speed
+ * @param aR The right sound speed
+ */
+__attribute__((always_inline))
+    INLINE static GFLOAT riemann_f(GFLOAT p, GFLOAT* WL, GFLOAT* WR, GFLOAT vL,
+                                   GFLOAT vR, GFLOAT aL, GFLOAT aR) {
+
+  return riemann_fb(p, WL, aL) + riemann_fb(p, WR, aR) + (vR - vL);
+}
+
+/**
+ * @brief Function (4.37) in Toro
+ *
+ * @param p The current guess for the pressure
+ * @param W The left or right state vector
+ * @param a The left or right sound speed
+ */
+__attribute__((always_inline)) INLINE static GFLOAT riemann_fprimeb(GFLOAT p,
+                                                                    GFLOAT* W,
+                                                                    GFLOAT a) {
+
+  GFLOAT fval = 0.;
+  GFLOAT A, B;
+  if (p > W[4]) {
+    A = const_riemann_tdgp1 / W[0];
+    B = const_riemann_gm1dgp1 * W[4];
+    fval = (1.0f - 0.5f * (p - W[4]) / (B + p)) * sqrtf(A / (p + B));
+  } else {
+    fval = 1.0f / (W[0] * a) * powf(p / W[4], -const_riemann_gp1d2g);
+  }
+  return fval;
+}
+
+/**
+ * @brief The derivative of riemann_f w.r.t. p
+ *
+ * @param p The current guess for the pressure
+ * @param WL The left state vector
+ * @param WR The right state vector
+ * @param aL The left sound speed
+ * @param aR The right sound speed
+ */
+__attribute__((always_inline)) INLINE static GFLOAT riemann_fprime(
+    GFLOAT p, GFLOAT* WL, GFLOAT* WR, GFLOAT aL, GFLOAT aR) {
+
+  return riemann_fprimeb(p, WL, aL) + riemann_fprimeb(p, WR, aR);
+}
+
+/**
+ * @brief Bottom function of (4.48) in Toro
+ *
+ * @param p The current guess for the pressure
+ * @param W The left or right state vector
+ */
+__attribute__((always_inline)) INLINE static GFLOAT riemann_gb(GFLOAT p,
+                                                               GFLOAT* W) {
+
+  GFLOAT A, B;
+  A = const_riemann_tdgp1 / W[0];
+  B = const_riemann_gm1dgp1 * W[4];
+  return sqrtf(A / (p + B));
+}
+
+/**
+ * @brief Get a good first guess for the pressure in the iterative scheme
+ *
+ * This function is based on (4.47) and (4.48) in Toro and on the
+ * FORTRAN code provided in Toro p.156-157
+ *
+ * @param WL The left state vector
+ * @param WR The right state vector
+ * @param vL The left velocity along the interface normal
+ * @param vR The right velocity along the interface normal
+ * @param aL The left sound speed
+ * @param aR The right sound speed
+ */
+__attribute__((always_inline)) INLINE static GFLOAT riemann_guess_p(
+    GFLOAT* WL, GFLOAT* WR, GFLOAT vL, GFLOAT vR, GFLOAT aL, GFLOAT aR) {
+
+  GFLOAT pguess, pmin, pmax, qmax;
+  GFLOAT ppv;
+
+  pmin = fminf(WL[4], WR[4]);
+  pmax = fmaxf(WL[4], WR[4]);
+  qmax = pmax / pmin;
+  ppv =
+      0.5f * (WL[4] + WR[4]) - 0.125f * (vR - vL) * (WL[0] + WR[0]) * (aL + aR);
+  ppv = fmaxf(1.e-8f, ppv);
+  if (qmax <= 2.0f && pmin <= ppv && ppv <= pmax) {
+    pguess = ppv;
+  } else {
+    if (ppv < pmin) {
+      /* two rarefactions */
+      pguess = powf((aL + aR - const_riemann_gm1d2 * (vR - vL)) /
+                        (aL / powf(WL[4], const_riemann_gm1d2g) +
+                         aR / powf(WR[4], const_riemann_gm1d2g)),
+                    const_riemann_tgdgm1);
+    } else {
+      /* two shocks */
+      pguess = (riemann_gb(ppv, WL) * WL[4] + riemann_gb(ppv, WR) * WR[4] - vR +
+                vL) /
+               (riemann_gb(ppv, WL) + riemann_gb(ppv, WR));
+    }
+  }
+  /* Toro: "Not that approximate solutions may predict, incorrectly, a negative
+     value for pressure (...).
+     Thus in order to avoid negative guess values we introduce the small
+     positive constant _tolerance" */
+  pguess = fmaxf(1.e-8f, pguess);
+  return pguess;
+}
+
+/**
+ * @brief Find the zeropoint of riemann_f(p) using Brent's method
+ *
+ * @param lower_limit Lower limit for the method (riemann_f(lower_limit) < 0)
+ * @param upper_limit Upper limit for the method (riemann_f(upper_limit) > 0)
+ * @param error_tol Tolerance used to decide if the solution is converged
+ * @param WL Left state vector
+ * @param WR Right state vector
+ * @param vL The left velocity along the interface normal
+ * @param vR The right velocity along the interface normal
+ * @param aL The left sound speed
+ * @param aR The right sound speed
+ */
+__attribute__((always_inline)) INLINE static GFLOAT riemann_solve_brent(
+    GFLOAT lower_limit, GFLOAT upper_limit, GFLOAT lowf, GFLOAT upf,
+    GFLOAT error_tol, GFLOAT* WL, GFLOAT* WR, GFLOAT vL, GFLOAT vR, GFLOAT aL,
+    GFLOAT aR) {
+
+  GFLOAT a, b, c, d, s;
+  GFLOAT fa, fb, fc, fs;
+  GFLOAT tmp, tmp2;
+  int mflag;
+  int i;
+
+  a = lower_limit;
+  b = upper_limit;
+  c = 0.0f;
+  d = FLT_MAX;
+
+  fa = lowf;
+  fb = upf;
+
+  fc = 0.0f;
+  s = 0.0f;
+  fs = 0.0f;
+
+  /* if f(a) f(b) >= 0 then error-exit */
+  if (fa * fb >= 0.0f) {
+    error(
+        "Brent's method called with equal sign function values!\n"
+        "f(%g) = %g, f(%g) = %g\n",
+        a, fa, b, fb);
+    /* return NaN */
+    return 0.0f / 0.0f;
+  }
+
+  /* if |f(a)| < |f(b)| then swap (a,b) */
+  if (fabs(fa) < fabs(fb)) {
+    tmp = a;
+    a = b;
+    b = tmp;
+    tmp = fa;
+    fa = fb;
+    fb = tmp;
+  }
+
+  c = a;
+  fc = fa;
+  mflag = 1;
+  i = 0;
+
+  while (!(fb == 0.0f) && (fabs(a - b) > error_tol * 0.5f * (a + b))) {
+    if ((fa != fc) && (fb != fc)) /* Inverse quadratic interpolation */
+      s = a * fb * fc / (fa - fb) / (fa - fc) +
+          b * fa * fc / (fb - fa) / (fb - fc) +
+          c * fa * fb / (fc - fa) / (fc - fb);
+    else
+      /* Secant Rule */
+      s = b - fb * (b - a) / (fb - fa);
+
+    tmp2 = 0.25f * (3.0f * a + b);
+    if (!(((s > tmp2) && (s < b)) || ((s < tmp2) && (s > b))) ||
+        (mflag && (fabs(s - b) >= (0.5f * fabs(b - c)))) ||
+        (!mflag && (fabs(s - b) >= (0.5f * fabs(c - d)))) ||
+        (mflag && (fabs(b - c) < error_tol)) ||
+        (!mflag && (fabs(c - d) < error_tol))) {
+      s = 0.5f * (a + b);
+      mflag = 1;
+    } else {
+      mflag = 0;
+    }
+    fs = riemann_f(s, WL, WR, vL, vR, aL, aR);
+    d = c;
+    c = b;
+    fc = fb;
+    if (fa * fs < 0.) {
+      b = s;
+      fb = fs;
+    } else {
+      a = s;
+      fa = fs;
+    }
+
+    /* if |f(a)| < |f(b)| then swap (a,b) */
+    if (fabs(fa) < fabs(fb)) {
+      tmp = a;
+      a = b;
+      b = tmp;
+      tmp = fa;
+      fa = fb;
+      fb = tmp;
+    }
+    i++;
+  }
+  return b;
+}
+
+/**
+ * @brief Vacuum Riemann solver, based on section 4.6 in Toro
+ *
+ * @param WL The left state vector
+ * @param WR The right state vector
+ * @param vL The left velocity along the interface normal
+ * @param vR The right velocity along the interface normal
+ * @param aL The left sound speed
+ * @param aR The right sound speed
+ * @param Whalf Empty state vector to store the solution in
+ * @param n_unit Normal vector of the interface
+ */
+__attribute__((always_inline)) INLINE static void riemann_solve_vacuum(
+    GFLOAT* WL, GFLOAT* WR, GFLOAT vL, GFLOAT vR, GFLOAT aL, GFLOAT aR,
+    GFLOAT* Whalf, float* n_unit) {
+
+  GFLOAT SL, SR;
+  GFLOAT vhalf;
+
+  if (!WR[0] && !WL[0]) {
+    /* if both states are vacuum, the solution is also vacuum */
+    Whalf[0] = 0.0f;
+    Whalf[1] = 0.0f;
+    Whalf[2] = 0.0f;
+    Whalf[3] = 0.0f;
+    Whalf[4] = 0.0f;
+    return;
+  }
+  if (!WR[0]) {
+    Whalf[1] = WL[1];
+    Whalf[2] = WL[2];
+    Whalf[3] = WL[3];
+    /* vacuum right state */
+    if (vL < aL) {
+      SL = vL + const_riemann_tdgm1 * aL;
+      if (SL > 0.0f) {
+        Whalf[0] =
+            WL[0] * powf(const_riemann_tdgp1 + const_riemann_gm1dgp1 / aL * vL,
+                         const_riemann_tdgm1);
+        vhalf = const_riemann_tdgp1 * (aL + const_riemann_gm1d2 * vL) - vL;
+        Whalf[4] =
+            WL[4] * powf(const_riemann_tdgp1 + const_riemann_gm1dgp1 / aL * vL,
+                         const_riemann_tgdgm1);
+      } else {
+        Whalf[0] = 0.0f;
+        Whalf[1] = 0.0f;
+        Whalf[2] = 0.0f;
+        Whalf[3] = 0.0f;
+        Whalf[4] = 0.0f;
+        return;
+      }
+    } else {
+      Whalf[0] = WL[0];
+      vhalf = 0.0f;
+      Whalf[4] = WL[4];
+    }
+  } else {
+    if (!WL[0]) {
+      Whalf[1] = WR[1];
+      Whalf[2] = WR[2];
+      Whalf[3] = WR[3];
+      /* vacuum left state */
+      if (-vR < aR) {
+        SR = vR - const_riemann_tdgm1 * aR;
+        if (SR >= 0.0f) {
+          Whalf[0] = 0.0f;
+          Whalf[1] = 0.0f;
+          Whalf[2] = 0.0f;
+          Whalf[3] = 0.0f;
+          Whalf[4] = 0.0f;
+          return;
+        } else {
+          Whalf[0] = WR[0] *
+                     powf(const_riemann_tdgp1 - const_riemann_gm1dgp1 / aR * vR,
+                          const_riemann_tdgm1);
+          vhalf = const_riemann_tdgp1 * (-aR + const_riemann_gm1d2 * vR) - vR;
+          Whalf[4] = WR[4] *
+                     powf(const_riemann_tdgp1 - const_riemann_gm1dgp1 / aR * vR,
+                          const_riemann_tgdgm1);
+        }
+      } else {
+        Whalf[0] = WR[0];
+        vhalf = 0.0f;
+        Whalf[4] = WR[4];
+      }
+    } else {
+      /* vacuum generation */
+      SR = vR - const_riemann_tdgm1 * aR;
+      SL = vL + const_riemann_tdgm1 * aL;
+      if (SR > 0.0f && SL < 0.0f) {
+        Whalf[0] = 0.0f;
+        Whalf[1] = 0.0f;
+        Whalf[2] = 0.0f;
+        Whalf[3] = 0.0f;
+        Whalf[4] = 0.0f;
+        return;
+      } else {
+        if (SL >= 0.0f) {
+          Whalf[1] = WL[1];
+          Whalf[2] = WL[2];
+          Whalf[3] = WL[3];
+          if (aL > vL) {
+            Whalf[0] = WL[0] * powf(const_riemann_tdgp1 +
+                                        const_riemann_gm1dgp1 / aL * vL,
+                                    const_riemann_tdgm1);
+            vhalf = const_riemann_tdgp1 * (aL + const_riemann_gm1d2 * vL) - vL;
+            Whalf[4] = WL[4] * powf(const_riemann_tdgp1 +
+                                        const_riemann_gm1dgp1 / aL * vL,
+                                    const_riemann_tgdgm1);
+          } else {
+            Whalf[0] = WL[0];
+            vhalf = 0.0f;
+            Whalf[4] = WL[4];
+          }
+        } else {
+          Whalf[1] = WR[1];
+          Whalf[2] = WR[2];
+          Whalf[3] = WR[3];
+          if (-vR < aR) {
+            Whalf[0] = WR[0] * powf(const_riemann_tdgp1 -
+                                        const_riemann_gm1dgp1 / aR * vR,
+                                    const_riemann_tdgm1);
+            vhalf = const_riemann_tdgp1 * (-aR + const_riemann_gm1d2 * vR) - vR;
+            Whalf[4] = WR[4] * powf(const_riemann_tdgp1 -
+                                        const_riemann_gm1dgp1 / aR * vR,
+                                    const_riemann_tgdgm1);
+          } else {
+            Whalf[0] = WR[0];
+            vhalf = 0.0f;
+            Whalf[4] = WR[4];
+          }
+        }
+      }
+    }
+  }
+
+  /* Add the velocity solution along the interface normal to the velocities */
+  Whalf[1] += vhalf * n_unit[0];
+  Whalf[2] += vhalf * n_unit[1];
+  Whalf[3] += vhalf * n_unit[2];
+}
+
+/* Solve the Riemann problem between the states WL and WR and store the result
+ * in Whalf
+ * The Riemann problem is solved in the x-direction; the velocities in the y-
+ * and z-direction
+ * are simply advected.
+ */
+/**
+ * @brief Solve the Riemann problem between the given left and right state and
+ * along the given interface normal
+ *
+ * Based on chapter 4 in Toro
+ *
+ * @param WL The left state vector
+ * @param WR The right state vector
+ * @param Whalf Empty state vector in which the result will be stored
+ * @param n_unit Normal vector of the interface
+ */
+__attribute__((always_inline)) INLINE static void riemann_solver_solve(
+    GFLOAT* WL, GFLOAT* WR, GFLOAT* Whalf, float* n_unit) {
+
+  /* velocity of the left and right state in a frame aligned with n_unit */
+  GFLOAT vL, vR, vhalf;
+  /* sound speeds */
+  GFLOAT aL, aR;
+  /* variables used for finding pstar */
+  GFLOAT p, pguess, fp, fpguess;
+  /* variables used for sampling the solution */
+  GFLOAT u;
+  GFLOAT pdpR, SR;
+  GFLOAT SHR, STR;
+  GFLOAT pdpL, SL;
+  GFLOAT SHL, STL;
+  int errorFlag = 0;
+
+  /* sanity checks */
+  if (WL[0] != WL[0] || WL[4] != WL[4]) {
+    printf("NaN WL!\n");
+    errorFlag = 1;
+  }
+  if (WR[0] != WR[0] || WR[4] != WR[4]) {
+    printf("NaN WR!\n");
+    errorFlag = 1;
+  }
+  if (WL[0] < 0.0f || WL[4] < 0.0f) {
+    printf("Negative WL!\n");
+    errorFlag = 1;
+  }
+  if (WR[0] < 0.0f || WR[4] < 0.0f) {
+    printf("Negative WR!\n");
+    errorFlag = 1;
+  }
+  if (errorFlag) {
+    printf("WL: %g %g %g %g %g\n", WL[0], WL[1], WL[2], WL[3], WL[4]);
+    printf("WR: %g %g %g %g %g\n", WR[0], WR[1], WR[2], WR[3], WR[4]);
+    error("Riemman solver input error!\n");
+  }
+
+  /* calculate velocities in interface frame */
+  vL = WL[1] * n_unit[0] + WL[2] * n_unit[1] + WL[3] * n_unit[2];
+  vR = WR[1] * n_unit[0] + WR[2] * n_unit[1] + WR[3] * n_unit[2];
+
+  /* calculate sound speeds */
+  aL = sqrtf(const_hydro_gamma * WL[4] / WL[0]);
+  aR = sqrtf(const_hydro_gamma * WR[4] / WR[0]);
+
+  if (!WL[0] || !WR[0]) {
+    /* vacuum: we need a vacuum riemann solver */
+    riemann_solve_vacuum(WL, WR, vL, vR, aL, aR, Whalf, n_unit);
+    return;
+  }
+
+  /* check vacuum generation condition */
+  if (2.0f * aL / (const_hydro_gamma - 1.0f) +
+          2.0f * aR / (const_hydro_gamma - 1.0f) <
+      fabs(vL - vR)) {
+    /* vacuum generation: need a vacuum riemann solver */
+    riemann_solve_vacuum(WL, WR, vL, vR, aL, aR, Whalf, n_unit);
+    return;
+  } else {
+    /* values are ok: let's find pstar (riemann_f(pstar) = 0)! */
+    /* We normally use a Newton-Raphson iteration to find the zeropoint
+       of riemann_f(p), but if pstar is close to 0, we risk negative p values.
+       Since riemann_f(p) is undefined for negative pressures, we don't
+       want this to happen.
+       We therefore use Brent's method if riemann_f(0) is larger than some
+       value. -5 makes the iteration fail safe while almost never invoking
+       the expensive Brent solver. */
+    p = 0.;
+    /* obtain a first guess for p */
+    pguess = riemann_guess_p(WL, WR, vL, vR, aL, aR);
+    fp = riemann_f(p, WL, WR, vL, vR, aL, aR);
+    fpguess = riemann_f(pguess, WL, WR, vL, vR, aL, aR);
+    /* ok, pstar is close to 0, better use Brent's method... */
+    /* we use Newton-Raphson until we find a suitable interval */
+    if (fp * fpguess >= 0.0f) {
+      /* Newton-Raphson until convergence or until suitable interval is found
+         to use Brent's method */
+      unsigned int counter = 0;
+      while (fabs(p - pguess) > 1.e-6f * 0.5f * (p + pguess) &&
+             fpguess < 0.0f) {
+        p = pguess;
+        pguess = pguess - fpguess / riemann_fprime(pguess, WL, WR, aL, aR);
+        fpguess = riemann_f(pguess, WL, WR, vL, vR, aL, aR);
+        counter++;
+        if (counter > 1000) {
+          error("Stuck in Newton-Raphson!\n");
+        }
+      }
+    }
+    /* As soon as there is a suitable interval: use Brent's method */
+    if (1.e6 * fabs(p - pguess) > 0.5f * (p + pguess) && fpguess > 0.0f) {
+      p = 0.0f;
+      fp = riemann_f(p, WL, WR, vL, vR, aL, aR);
+      /* use Brent's method to find the zeropoint */
+      p = riemann_solve_brent(p, pguess, fp, fpguess, 1.e-6, WL, WR, vL, vR, aL,
+                              aR);
+    } else {
+      p = pguess;
+    }
+
+    /* calculate the velocity in the intermediate state */
+    u = 0.5f * (vL + vR) +
+        0.5f * (riemann_fb(p, WR, aR) - riemann_fb(p, WL, aL));
+
+    /* sample the solution */
+    /* This corresponds to the flow chart in Fig. 4.14 in Toro */
+    if (u < 0.0f) {
+      /* advect velocity components */
+      Whalf[1] = WR[1];
+      Whalf[2] = WR[2];
+      Whalf[3] = WR[3];
+      pdpR = p / WR[4];
+      if (p > WR[4]) {
+        /* shockwave */
+        SR =
+            vR + aR * sqrtf(const_riemann_gp1d2g * pdpR + const_riemann_gm1d2g);
+        if (SR > 0.0f) {
+          Whalf[0] = WR[0] * (pdpR + const_riemann_gm1dgp1) /
+                     (const_riemann_gm1dgp1 * pdpR + 1.0f);
+          vhalf = u - vR;
+          Whalf[4] = p;
+        } else {
+          Whalf[0] = WR[0];
+          vhalf = 0.0f;
+          Whalf[4] = WR[4];
+        }
+      } else {
+        /* rarefaction wave */
+        SHR = vR + aR;
+        if (SHR > 0.0f) {
+          STR = u + aR * powf(pdpR, const_riemann_gm1d2g);
+          if (STR <= 0.0f) {
+            Whalf[0] = WR[0] * powf(const_riemann_tdgp1 -
+                                        const_riemann_gm1dgp1 / aR * vR,
+                                    const_riemann_tdgm1);
+            vhalf = const_riemann_tdgp1 * (-aR + const_riemann_gm1d2 * vR) - vR;
+            Whalf[4] = WR[4] * powf(const_riemann_tdgp1 -
+                                        const_riemann_gm1dgp1 / aR * vR,
+                                    const_riemann_tgdgm1);
+          } else {
+            Whalf[0] = WR[0] * powf(pdpR, const_riemann_ginv);
+            vhalf = u - vR;
+            Whalf[4] = p;
+          }
+        } else {
+          Whalf[0] = WR[0];
+          vhalf = 0.0f;
+          Whalf[4] = WR[4];
+        }
+      }
+    } else {
+      Whalf[1] = WL[1];
+      Whalf[2] = WL[2];
+      Whalf[3] = WL[3];
+      pdpL = p / WL[4];
+      if (p > WL[4]) {
+        /* shockwave */
+        SL =
+            vL - aL * sqrtf(const_riemann_gp1d2g * pdpL + const_riemann_gm1d2g);
+        if (SL < 0.0f) {
+          Whalf[0] = WL[0] * (pdpL + const_riemann_gm1dgp1) /
+                     (const_riemann_gm1dgp1 * pdpL + 1.0f);
+          vhalf = u - vL;
+          Whalf[4] = p;
+        } else {
+          Whalf[0] = WL[0];
+          vhalf = 0.0f;
+          Whalf[4] = WL[4];
+        }
+      } else {
+        /* rarefaction wave */
+        SHL = vL - aL;
+        if (SHL < 0.0f) {
+          STL = u - aL * powf(pdpL, const_riemann_gm1d2g);
+          if (STL > 0.0f) {
+            Whalf[0] = WL[0] * powf(const_riemann_tdgp1 +
+                                        const_riemann_gm1dgp1 / aL * vL,
+                                    const_riemann_tdgm1);
+            vhalf = const_riemann_tdgp1 * (aL + const_riemann_gm1d2 * vL) - vL;
+            Whalf[4] = WL[4] * powf(const_riemann_tdgp1 +
+                                        const_riemann_gm1dgp1 / aL * vL,
+                                    const_riemann_tgdgm1);
+          } else {
+            Whalf[0] = WL[0] * powf(pdpL, const_riemann_ginv);
+            vhalf = u - vL;
+            Whalf[4] = p;
+          }
+        } else {
+          Whalf[0] = WL[0];
+          vhalf = 0.0f;
+          Whalf[4] = WL[4];
+        }
+      }
+    }
+  }
+
+  /* add the velocity solution along the interface normal to the velocities */
+  Whalf[1] += vhalf * n_unit[0];
+  Whalf[2] += vhalf * n_unit[1];
+  Whalf[3] += vhalf * n_unit[2];
+}
+
+__attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
+    GFLOAT* Wi, GFLOAT* Wj, float* n_unit, float* vij, GFLOAT* totflux) {
+
+  GFLOAT Whalf[5];
+  GFLOAT flux[5][3];
+  float vtot[3];
+  float rhoe;
+
+  riemann_solver_solve(Wi, Wj, Whalf, n_unit);
+
+  flux[0][0] = Whalf[0] * Whalf[1];
+  flux[0][1] = Whalf[0] * Whalf[2];
+  flux[0][2] = Whalf[0] * Whalf[3];
+
+  vtot[0] = Whalf[1] + vij[0];
+  vtot[1] = Whalf[2] + vij[1];
+  vtot[2] = Whalf[3] + vij[2];
+  flux[1][0] = Whalf[0] * vtot[0] * Whalf[1] + Whalf[4];
+  flux[1][1] = Whalf[0] * vtot[0] * Whalf[2];
+  flux[1][2] = Whalf[0] * vtot[0] * Whalf[3];
+  flux[2][0] = Whalf[0] * vtot[1] * Whalf[1];
+  flux[2][1] = Whalf[0] * vtot[1] * Whalf[2] + Whalf[4];
+  flux[2][2] = Whalf[0] * vtot[1] * Whalf[3];
+  flux[3][0] = Whalf[0] * vtot[2] * Whalf[1];
+  flux[3][1] = Whalf[0] * vtot[2] * Whalf[2];
+  flux[3][2] = Whalf[0] * vtot[2] * Whalf[3] + Whalf[4];
+
+  /* eqn. (15) */
+  /* F_P = \rho e ( \vec{v} - \vec{v_{ij}} ) + P \vec{v} */
+  /* \rho e = P / (\gamma-1) + 1/2 \rho \vec{v}^2 */
+  rhoe = Whalf[4] / (const_hydro_gamma - 1.0f) +
+         0.5f * Whalf[0] *
+             (vtot[0] * vtot[0] + vtot[1] * vtot[1] + vtot[2] * vtot[2]);
+  flux[4][0] = rhoe * Whalf[1] + Whalf[4] * vtot[0];
+  flux[4][1] = rhoe * Whalf[2] + Whalf[4] * vtot[1];
+  flux[4][2] = rhoe * Whalf[3] + Whalf[4] * vtot[2];
+
+  totflux[0] =
+      flux[0][0] * n_unit[0] + flux[0][1] * n_unit[1] + flux[0][2] * n_unit[2];
+  totflux[1] =
+      flux[1][0] * n_unit[0] + flux[1][1] * n_unit[1] + flux[1][2] * n_unit[2];
+  totflux[2] =
+      flux[2][0] * n_unit[0] + flux[2][1] * n_unit[1] + flux[2][2] * n_unit[2];
+  totflux[3] =
+      flux[3][0] * n_unit[0] + flux[3][1] * n_unit[1] + flux[3][2] * n_unit[2];
+  totflux[4] =
+      flux[4][0] * n_unit[0] + flux[4][1] * n_unit[1] + flux[4][2] * n_unit[2];
+}
+
+#endif /* SWIFT_RIEMANN_EXACT_H */
diff --git a/src/riemann/riemann_hllc.h b/src/riemann/riemann_hllc.h
new file mode 100644
index 0000000000000000000000000000000000000000..3fcc8b534ce65d4d364c400dc43e1992f39264b9
--- /dev/null
+++ b/src/riemann/riemann_hllc.h
@@ -0,0 +1,154 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2015 Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#ifndef SWIFT_RIEMANN_HLLC_H
+#define SWIFT_RIEMANN_HLLC_H
+
+__attribute__((always_inline)) INLINE static void riemann_solve_for_flux(
+    GFLOAT *WL, GFLOAT *WR, float *n, float *vij, GFLOAT *totflux) {
+
+  GFLOAT uL, uR, aL, aR;
+  GFLOAT rhobar, abar, pPVRS, pstar, qL, qR, SL, SR, Sstar;
+  GFLOAT v2, eL, eR;
+  GFLOAT UstarL[5], UstarR[5];
+
+  /* Handle pure vacuum */
+  if (!WL[0] && !WR[0]) {
+    totflux[0] = 0.;
+    totflux[1] = 0.;
+    totflux[2] = 0.;
+    totflux[3] = 0.;
+    totflux[4] = 0.;
+    return;
+  }
+
+  /* STEP 0: obtain velocity in interface frame */
+  uL = WL[1] * n[0] + WL[2] * n[1] + WL[3] * n[2];
+  uR = WR[1] * n[0] + WR[2] * n[1] + WR[3] * n[2];
+  aL = sqrtf(const_hydro_gamma * WL[4] / WL[0]);
+  aR = sqrtf(const_hydro_gamma * WR[4] / WR[0]);
+
+  /* Handle vacuum: vacuum does not require iteration and is always exact */
+  if (!WL[0] || !WR[0]) {
+    error("Vacuum not yet supported");
+  }
+  if (2. * aL / (const_hydro_gamma - 1.) + 2. * aR / (const_hydro_gamma - 1.) <
+      fabs(uL - uR)) {
+    error("Vacuum not yet supported");
+  }
+
+  /* STEP 1: pressure estimate */
+  rhobar = 0.5 * (WL[0] + WR[0]);
+  abar = 0.5 * (aL + aR);
+  pPVRS = 0.5 * (WL[4] + WR[4]) - 0.5 * (uR - uL) * rhobar * abar;
+  pstar = fmaxf(0., pPVRS);
+
+  /* STEP 2: wave speed estimates
+     all these speeds are along the interface normal, since uL and uR are */
+  qL = 1.;
+  if (pstar > WL[4]) {
+    qL = sqrtf(1. + 0.5 * (const_hydro_gamma + 1.) / const_hydro_gamma *
+                        (pstar / WL[4] - 1.));
+  }
+  qR = 1.;
+  if (pstar > WR[4]) {
+    qR = sqrtf(1. + 0.5 * (const_hydro_gamma + 1.) / const_hydro_gamma *
+                        (pstar / WR[4] - 1.));
+  }
+  SL = uL - aL * qL;
+  SR = uR + aR * qR;
+  Sstar = (WR[4] - WL[4] + WL[0] * uL * (SL - uL) - WR[0] * uR * (SR - uR)) /
+          (WL[0] * (SL - uL) - WR[0] * (SR - uR));
+
+  /* STEP 3: HLLC flux in a frame moving with the interface velocity */
+  if (Sstar >= 0.) {
+    /* flux FL */
+    totflux[0] = WL[0] * uL;
+    /* these are the actual correct fluxes in the boosted lab frame
+       (not rotated to interface frame) */
+    totflux[1] = WL[0] * WL[1] * uL + WL[4] * n[0];
+    totflux[2] = WL[0] * WL[2] * uL + WL[4] * n[1];
+    totflux[3] = WL[0] * WL[2] * uL + WL[4] * n[2];
+    v2 = WL[1] * WL[1] + WL[2] * WL[2] + WL[3] * WL[3];
+    eL = WL[4] / (const_hydro_gamma - 1.) / WL[0] + 0.5 * v2;
+    totflux[4] = WL[0] * eL * uL + WL[4] * uL;
+    if (SL < 0.) {
+      /* add flux FstarL */
+      UstarL[0] = 1.;
+      /* we need UstarL in the lab frame:
+         subtract the velocity in the interface frame from the lab frame
+         velocity and then add Sstar in interface frame */
+      UstarL[1] = WL[1] + (Sstar - uL) * n[0];
+      UstarL[2] = WL[2] + (Sstar - uL) * n[1];
+      UstarL[3] = WL[3] + (Sstar - uL) * n[2];
+      UstarL[4] = eL + (Sstar - uL) * (Sstar + WL[4] / (WL[0] * (SL - uL)));
+      UstarL[0] *= WL[0] * (SL - uL) / (SL - Sstar);
+      UstarL[1] *= WL[0] * (SL - uL) / (SL - Sstar);
+      UstarL[2] *= WL[0] * (SL - uL) / (SL - Sstar);
+      UstarL[3] *= WL[0] * (SL - uL) / (SL - Sstar);
+      UstarL[4] *= WL[0] * (SL - uL) / (SL - Sstar);
+      totflux[0] += SL * (UstarL[0] - WL[0]);
+      totflux[1] += SL * (UstarL[1] - WL[0] * WL[1]);
+      totflux[2] += SL * (UstarL[2] - WL[0] * WL[2]);
+      totflux[3] += SL * (UstarL[3] - WL[0] * WL[3]);
+      totflux[4] += SL * (UstarL[4] - WL[0] * eL);
+    }
+  } else {
+    /* flux FR */
+    totflux[0] = WR[0] * uR;
+    totflux[1] = WR[0] * WR[1] * uR + WR[4] * n[0];
+    totflux[2] = WR[0] * WR[2] * uR + WR[4] * n[1];
+    totflux[3] = WR[0] * WR[3] * uR + WR[4] * n[2];
+    v2 = WR[1] * WR[1] + WR[2] * WR[2] + WR[3] * WR[3];
+    eR = WR[4] / (const_hydro_gamma - 1.) / WR[0] + 0.5 * v2;
+    totflux[4] = WR[0] * eR * uR + WR[4] * uR;
+    if (SR > 0.) {
+      /* add flux FstarR */
+      UstarR[0] = 1.;
+      UstarR[1] = WR[1] + (Sstar - uR) * n[0];
+      UstarR[2] = WR[2] + (Sstar - uR) * n[1];
+      UstarR[3] = WR[3] + (Sstar - uR) * n[2];
+      UstarR[4] = eR + (Sstar - uR) * (Sstar + WR[4] / (WR[0] * (SR - uR)));
+      UstarR[0] *= WR[0] * (SR - uR) / (SR - Sstar);
+      UstarR[1] *= WR[0] * (SR - uR) / (SR - Sstar);
+      UstarR[2] *= WR[0] * (SR - uR) / (SR - Sstar);
+      UstarR[3] *= WR[0] * (SR - uR) / (SR - Sstar);
+      UstarR[4] *= WR[0] * (SR - uR) / (SR - Sstar);
+      totflux[0] += SR * (UstarR[0] - WR[0]);
+      totflux[1] += SR * (UstarR[1] - WR[0] * WR[1]);
+      totflux[2] += SR * (UstarR[2] - WR[0] * WR[2]);
+      totflux[3] += SR * (UstarR[3] - WR[0] * WR[3]);
+      totflux[4] += SR * (UstarR[4] - WR[0] * eR);
+    }
+  }
+
+  /* deboost to lab frame
+     we add the flux contribution due to the movement of the interface
+     the density flux is unchanged
+     we add the extra velocity flux due to the absolute motion of the fluid
+     similarly, we need to add the energy fluxes due to the absolute motion */
+  v2 = vij[0] * vij[0] + vij[1] * vij[1] + vij[2] * vij[2];
+  totflux[1] += vij[0] * totflux[0];
+  totflux[2] += vij[1] * totflux[0];
+  totflux[3] += vij[2] * totflux[0];
+  totflux[4] += vij[0] * totflux[1] + vij[1] * totflux[2] +
+                vij[2] * totflux[3] + 0.5 * v2 * totflux[0];
+}
+
+#endif /* SWIFT_RIEMANN_HLLC_H */
diff --git a/src/riemann/riemann_trrs.h b/src/riemann/riemann_trrs.h
new file mode 100644
index 0000000000000000000000000000000000000000..56f7feadae6ea89cec742e298a269e787d58e7b3
--- /dev/null
+++ b/src/riemann/riemann_trrs.h
@@ -0,0 +1,144 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Coypright (c) 2015 Bert Vandenbroucke (bert.vandenbroucke@ugent.be)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ ******************************************************************************/
+
+#ifndef SWIFT_RIEMANN_TRRS_H
+#define SWIFT_RIEMANN_TRRS_H
+
+/* frequently used combinations of const_hydro_gamma */
+#define const_riemann_gp1d2g \
+  (0.5f * (const_hydro_gamma + 1.0f) / const_hydro_gamma)
+#define const_riemann_gm1d2g \
+  (0.5f * (const_hydro_gamma - 1.0f) / const_hydro_gamma)
+#define const_riemann_gm1dgp1 \
+  ((const_hydro_gamma - 1.0f) / (const_hydro_gamma + 1.0f))
+#define const_riemann_tdgp1 (2.0f / (const_hydro_gamma + 1.0f))
+#define const_riemann_tdgm1 (2.0f / (const_hydro_gamma - 1.0f))
+#define const_riemann_gm1d2 (0.5f * (const_hydro_gamma - 1.0f))
+#define const_riemann_tgdgm1 \
+  (2.0f * const_hydro_gamma / (const_hydro_gamma - 1.0f))
+#define const_riemann_ginv (1.0f / const_hydro_gamma)
+
+/**
+ * @brief Solve the Riemann problem using the Two Rarefaction Riemann Solver
+ *
+ * By assuming 2 rarefaction waves, we can analytically solve for the pressure
+ * and velocity in the intermediate region, eliminating the iterative procedure.
+ *
+ * According to Toro: "The two-rarefaction approximation is generally quite
+ * robust; (...) The TRRS is in fact exact when both non-linear waves are
+ * actually rarefaction waves."
+ *
+ * @param WL The left state vector
+ * @param WR The right state vector
+ * @param Whalf Empty state vector in which the result will be stored
+ * @param n_unit Normal vector of the interface
+ */
+__attribute__((always_inline)) INLINE static void riemann_solver_solve(
+    GFLOAT* WL, GFLOAT* WR, GFLOAT* Whalf, float* n_unit) {
+  GFLOAT aL, aR;
+  GFLOAT PLR;
+  GFLOAT vL, vR;
+  GFLOAT ustar, pstar;
+  GFLOAT vhalf;
+  GFLOAT pdpR, SHR, STR;
+  GFLOAT pdpL, SHL, STL;
+
+  /* calculate the velocities along the interface normal */
+  vL = WL[1] * n_unit[0] + WL[2] * n_unit[1] + WL[3] * n_unit[2];
+  vR = WR[1] * n_unit[0] + WR[2] * n_unit[1] + WR[3] * n_unit[2];
+
+  /* calculate the sound speeds */
+  aL = sqrtf(const_hydro_gamma * WL[4] / WL[0]);
+  aR = sqrtf(const_hydro_gamma * WR[4] / WR[0]);
+
+  /* calculate the velocity and pressure in the intermediate state */
+  PLR = pow(WL[4] / WR[4], const_riemann_gm1d2g);
+  ustar = (PLR * vL / aL + vR / aR + const_riemann_tdgm1 * (PLR - 1.0f)) /
+          (PLR / aL + 1.0f / aR);
+  pstar = 0.5f * (WL[4] * pow(1.0f + const_riemann_gm1d2 / aL * (vL - ustar),
+                              const_riemann_tgdgm1) +
+                  WR[4] * pow(1.0f + const_riemann_gm1d2 / aR * (ustar - vR),
+                              const_riemann_tgdgm1));
+
+  /* sample the solution */
+  if (ustar < 0.0f) {
+    /* right state */
+    Whalf[1] = WR[1];
+    Whalf[2] = WR[2];
+    Whalf[3] = WR[3];
+    pdpR = pstar / WR[4];
+    /* always a rarefaction wave, that's the approximation */
+    SHR = vR + aR;
+    if (SHR > 0.0f) {
+      STR = ustar + aR * pow(pdpR, const_riemann_gm1d2g);
+      if (STR <= 0.0f) {
+        Whalf[0] =
+            WR[0] * pow(const_riemann_tdgp1 - const_riemann_gm1dgp1 / aR * vR,
+                        const_riemann_tdgm1);
+        vhalf = const_riemann_tdgp1 * (-aR + const_riemann_gm1d2 * vR) - vR;
+        Whalf[4] =
+            WR[4] * pow(const_riemann_tdgp1 - const_riemann_gm1dgp1 / aR * vR,
+                        const_riemann_tgdgm1);
+      } else {
+        Whalf[0] = WR[0] * pow(pdpR, const_riemann_ginv);
+        vhalf = ustar - vR;
+        Whalf[4] = pstar;
+      }
+    } else {
+      Whalf[0] = WR[0];
+      vhalf = 0.0f;
+      Whalf[4] = WR[4];
+    }
+  } else {
+    /* left state */
+    Whalf[1] = WL[1];
+    Whalf[2] = WL[2];
+    Whalf[3] = WL[3];
+    pdpL = pstar / WL[4];
+    /* rarefaction wave */
+    SHL = vL - aL;
+    if (SHL < 0.0f) {
+      STL = ustar - aL * pow(pdpL, const_riemann_gm1d2g);
+      if (STL > 0.0f) {
+        Whalf[0] =
+            WL[0] * pow(const_riemann_tdgp1 + const_riemann_gm1dgp1 / aL * vL,
+                        const_riemann_tdgm1);
+        vhalf = const_riemann_tdgp1 * (aL + const_riemann_gm1d2 * vL) - vL;
+        Whalf[4] =
+            WL[4] * pow(const_riemann_tdgp1 + const_riemann_gm1dgp1 / aL * vL,
+                        const_riemann_tgdgm1);
+      } else {
+        Whalf[0] = WL[0] * pow(pdpL, const_riemann_ginv);
+        vhalf = ustar - vL;
+        Whalf[4] = pstar;
+      }
+    } else {
+      Whalf[0] = WL[0];
+      vhalf = 0.0f;
+      Whalf[4] = WL[4];
+    }
+  }
+
+  /* add the velocity solution along the interface normal to the velocities */
+  Whalf[1] += vhalf * n_unit[0];
+  Whalf[2] += vhalf * n_unit[1];
+  Whalf[3] += vhalf * n_unit[2];
+}
+
+#endif /* SWIFT_RIEMANN_TRRS_H */
diff --git a/src/runner_doiact.h b/src/runner_doiact.h
index 5b29576c5abf3d5d60dec05c16e1f20c52d86834..90bd2bb8a7a618f8539d98b91ffdf347f17eca17 100644
--- a/src/runner_doiact.h
+++ b/src/runner_doiact.h
@@ -958,8 +958,8 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) {
   struct engine *restrict e = r->e;
   int pid, pjd, k, sid;
   double rshift, shift[3] = {0.0, 0.0, 0.0};
-  struct entry *restrict sort_i, *restrict sort_j;
-  struct entry *restrict sortdt_i = NULL, *restrict sortdt_j = NULL;
+  struct entry *sort_i, *sort_j;
+  struct entry *sortdt_i = NULL, *sortdt_j = NULL;
   int countdt_i = 0, countdt_j = 0;
   struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
   double pix[3], pjx[3], di, dj;
diff --git a/src/scheduler.c b/src/scheduler.c
index 35f1504e25dec140d8746ec10bc644fb44f5374d..047a402abbdc6877958ea8e2730472cf0a995ebe 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -192,7 +192,7 @@ void scheduler_splittasks(struct scheduler *s) {
           t->ci = ci->progeny[k];
           for (k += 1; k < 8; k++)
             if (ci->progeny[k] != NULL)
-              scheduler_addtask(s, task_type_self, task_subtype_density, 0, 0,
+              scheduler_addtask(s, task_type_self, t->subtype, 0, 0,
                                 ci->progeny[k], NULL, 0);
 
           /* Make a task for each pair of progeny. */
@@ -200,9 +200,8 @@ void scheduler_splittasks(struct scheduler *s) {
             if (ci->progeny[j] != NULL)
               for (k = j + 1; k < 8; k++)
                 if (ci->progeny[k] != NULL)
-                  scheduler_addtask(s, task_type_pair, task_subtype_density,
-                                    pts[j][k], 0, ci->progeny[j],
-                                    ci->progeny[k], 0);
+                  scheduler_addtask(s, task_type_pair, t->subtype, pts[j][k], 0,
+                                    ci->progeny[j], ci->progeny[k], 0);
         }
       }