diff --git a/AUTHORS b/AUTHORS index 4a841f813ede6727a53861037be7fbb8aed021b2..32597ffd146795656286b9528656ab013c519c30 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,2 +1,5 @@ -Pedro Gonnet: Main author. -Matthieu Schaller: TBA. +Pedro Gonnet gonnet@google.com +Matthieu Schaller matthieu.schaller@durham.ac.uk +Aidan Chalk aidan.chalk@durham.ac.uk +Peter W. Draper p.w.draper@durham.ac.uk +Bert Vandenbrouck bert.vandenbroucke@gmail.com diff --git a/INSTALL.swift b/INSTALL.swift index 758a5a517fb796cdbde4dd89afabef0403355cdc..33c14049b640bee7e1d7be4aaca435dc18b7fda0 100644 --- a/INSTALL.swift +++ b/INSTALL.swift @@ -58,10 +58,36 @@ for instance. GCC address sanitizer flags can be included using the option. Note this requires a GCC compiler version of at least 4.8. -Dependencies: needs to be filled in... +SWIFT currently requires a compiler with OpenMP support. - + Dependencies + ============ +SWIFT depends on a number of thirdparty libraries that should be available +before you can build it. +HDF5: a HDF5 library is required to read and write particle data. One of the +commands "h5cc" or "h5pcc" should be available. If "h5pcc" is located them a +parallel HDF5 built for the version of MPI located should be provided. If +the command is not available then it can be located using the "--with-hfd5" +configure option. The value should be the full path to the "h5cc" or "h5pcc" +commands. + + +MPI: an optional MPI library that fully supports MPI_THREAD_MULTIPLE. +Before running configure the "mpirun" command should be available in the +shell. If your command isn't called "mpirun" then define the "MPIRUN" +environment variable, either in the shell or when running configure. + + +METIS: a build of the METIS library can be optionally used to optimize the +load between MPI nodes (requires an MPI library). This should be found in the +standard installation directories, or pointed at using the "--with-metis" +configuration option. In this case the top-level installation directory of +the METIS build should be given. Note to use METIS you should at least supply +"--with-metis". + + +DOXYGEN: the doxygen library is required to create the SWIFT API documentation. diff --git a/README b/README index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..279ee92eab9774e8e6a10d24283b79dfceb65bdb 100644 --- a/README +++ b/README @@ -0,0 +1,3 @@ +This is SWIFT. See INSTALL.swift for instructions. + +www.swiftsim.com diff --git a/configure.ac b/configure.ac index 4cfda7aaac6609dcee6f154d0625b57bade744b1..8ff520fd42861b2b7267ed80973bd765a14fb13b 100644 --- a/configure.ac +++ b/configure.ac @@ -57,15 +57,21 @@ AC_ARG_ENABLE([mpi], [enable_mpi="$enableval"], [enable_mpi="yes"] ) +good_mpi="yes" if test "$enable_mpi" = "yes"; then AX_MPI([CC="$MPICC" AC_DEFINE(HAVE_MPI, 1, [Define if you have the MPI library.]) ]) # Various MPI implementations require additional libraries when also using # threads. Use mpirun (on PATH) as that seems to be only command with - # version flag. - AC_PATH_PROG([MPIRUN],[mpirun],[notfound]) + # version flag, allow MPIRUN to override for systems that insist on + # a non-standard name (PRACE). + : ${MPIRUN='mpirun'} + if test "$MPIRUN" = "mpirun"; then + AC_PATH_PROG([MPIRUN],[mpirun],[notfound]) + fi if test "$MPIRUN" = "notfound"; then AC_MSG_WARN([Cannot find mpirun command on PATH, thread support may not be correct]) + enable_mpi="no" else # Special options we know about. # Intel: -mt_mpi @@ -86,6 +92,15 @@ if test "$enable_mpi" = "yes"; then *"Open MPI"*) MPI_THREAD_LIBS="" AC_MSG_RESULT([Open MPI]) + # OpenMPI should be 1.8.6 or later, if not complain. + # Version is last word on first line of -version output. + revision=`mpirun -version 2>&1 | grep "Open MPI" | awk '{print $NF}'` + AX_COMPARE_VERSION( $revision, [ge], [1.8.6],,[good_mpi="no"] ) + if test "$good_mpi" = "no"; then + AC_MSG_WARN([ + Open MPI version should be at least 1.8.6 (is $revision)]) + enable_mpi="yes (but with warning)" + fi ;; *) MPI_THREAD_LIBS="" @@ -95,7 +110,10 @@ if test "$enable_mpi" = "yes"; then AC_SUBST([MPI_THREAD_LIBS]) fi fi -AM_CONDITIONAL([HAVEMPI],[test -n "$MPICC"]) +AM_CONDITIONAL([HAVEMPI],[test $enable_mpi = "yes"]) + +# Indicate that MPIRUN can be modified by an environement variable +AC_ARG_VAR(MPIRUN, Path to the mpirun command if non-standard) # Need C99 and inline support. AC_PROG_CC_C99 @@ -170,39 +188,31 @@ AX_PTHREAD([LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS" or use CPPFLAGS and LDFLAGS if the library is installed in a non-standard location.])) -# Check for OpenMP. -AC_OPENMP -AC_SUBST(OPENMP_CFLAGS) -enable_openmp="no" -if test -z "$OPENMP_CFLAGS"; then - echo $OPENMP_CFLAGS - AC_MSG_ERROR(Compiler does not support OpenMP, 1) -else - CFLAGS="$CFLAGS $OPENMP_CFLAGS" - enable_openmp="yes" -fi # Check for metis. Note AX_LIB_METIS exists, but cannot be configured # to be default off (i.e. given no option it tries to locate METIS), so we # don't use that. AC_ARG_WITH([metis], [AS_HELP_STRING([--with-metis=PATH], - [prefix where the metis library is installed @<:@default=yes@:>@] + [root directory where metis is installed @<:@default=yes@:>@] )], [], [with_metis="no"] ) if test "x$with_metis" != "xno"; then if test "x$with_metis" != "xyes" -a "x$with_metis" != "x"; then - METIS_LIBS="-L$with_metis -lmetis" + METIS_LIBS="-L$with_metis/lib -lmetis" + METIS_INCS="-I$with_metis/include" else METIS_LIBS="-lmetis" + METIS_INCS="" fi AC_CHECK_LIB([metis],[METIS_PartGraphKway], AC_DEFINE([HAVE_METIS],1,[The metis library appears to be present.]), AC_MSG_ERROR(something is wrong with the metis library!),$METIS_LIBS) fi AC_SUBST([METIS_LIBS]) +AC_SUBST([METIS_INCS]) AM_CONDITIONAL([HAVEMETIS],[test -n "$METIS_LIBS"]) # Check for zlib. @@ -219,7 +229,7 @@ AX_LIB_HDF5 # The default is to use MPI support if it is available, i.e. this is # a parallel HDF5. # To do this need to ask the HDF5 compiler about its configuration, -# -showconfig should have yes/no. +# -showconfig should have yes/no. have_parallel_hdf5="no" if test "$with_hdf5" = "yes"; then AC_ARG_ENABLE([parallel-hdf5], @@ -284,7 +294,6 @@ AC_MSG_RESULT([ MPI enabled: $enable_mpi HDF5 enabled: $with_hdf5 parallel: $have_parallel_hdf5 - OpenMP enabled: $enable_openmp Metis enabled: $with_metis ]) diff --git a/examples/Makefile.am b/examples/Makefile.am index eff15afef3ed0597f95c38f5160178d9868abe48..0ae3cbba9ded84e94b4cbe583344064b391d1c4a 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -20,13 +20,13 @@ MYFLAGS = -DTIMER # Add the source directory and debug to CFLAGS -AM_CFLAGS = -Wall -Werror -I../src -DCPU_TPS=2.67e9 +AM_CFLAGS = -Wall -Werror -I../src -DCPU_TPS=2.67e9 $(HDF5_CPPFLAGS) AM_LDFLAGS = -METIS_LIBS = @METIS_LIBS@ MPI_THREAD_LIBS = @MPI_THREAD_LIBS@ MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) +MPI_FLAGS = -DWITH_MPI $(METIS_INCS) # Set-up the library bin_PROGRAMS = test test_fixdt test_mindt test_single @@ -53,17 +53,17 @@ test_mindt_LDADD = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) # Sources for test_mpi test_mpi_SOURCES = test.c -test_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) -DWITH_MPI -DENGINE_POLICY="engine_policy_multistep | engine_policy_keep" +test_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_multistep | engine_policy_keep" test_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS) # Sources for test_fixdt_mpi test_fixdt_mpi_SOURCES = test.c -test_fixdt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) -DWITH_MPI -DENGINE_POLICY="engine_policy_fixdt | engine_policy_keep" +test_fixdt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_fixdt | engine_policy_keep" test_fixdt_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS) # Sources for test_mindt_mpi test_mindt_mpi_SOURCES = test.c -test_mindt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) -DWITH_MPI -DENGINE_POLICY="engine_policy_keep" +test_mindt_mpi_CFLAGS = $(MYFLAGS) $(AM_CFLAGS) $(MPI_FLAGS) -DENGINE_POLICY="engine_policy_keep" test_mindt_mpi_LDADD = ../src/.libs/libswiftsim_mpi.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(MPI_LIBS) # Sources for test_single diff --git a/examples/test.c b/examples/test.c index f6d5d42bbb4eb6aa3461392adec877867bb372d9..188b20c4118bf6aec607566ff9859803892b9d9a 100644 --- a/examples/test.c +++ b/examples/test.c @@ -1,7 +1,8 @@ /******************************************************************************* * This file is part of SWIFT. - * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), + * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), * Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * 2015 Peter W. Draper (p.w.draper@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -31,7 +32,6 @@ #include <float.h> #include <limits.h> #include <fenv.h> -#include <omp.h> /* Conditional headers. */ #ifdef HAVE_LIBZ @@ -270,7 +270,6 @@ void pairs_n2 ( double *dim , struct part *__restrict__ parts , int N , int peri double rho_max = 0.0, rho_min = 100; /* Loop over all particle pairs. */ - #pragma omp parallel for schedule(dynamic), default(none), private(k,i,dx,r2), shared(periodic,parts,dim,N,stdout) for ( j = 0 ; j < N ; j++ ) { if ( j % 1000 == 0 ) { printf( "pairs_n2: j=%i.\n" , j ); @@ -290,13 +289,11 @@ void pairs_n2 ( double *dim , struct part *__restrict__ parts , int N , int peri if ( r2 < parts[j].h*parts[j].h || r2 < parts[k].h*parts[k].h ) { runner_iact_density( r2 , NULL , parts[j].h , parts[k].h , &parts[j] , &parts[k] ); /* if ( parts[j].h / parts[k].h > maxratio ) - #pragma omp critical { maxratio = parts[j].h / parts[k].h; mj = j; mk = k; } else if ( parts[k].h / parts[j].h > maxratio ) - #pragma omp critical { maxratio = parts[k].h / parts[j].h; mj = j; mk = k; @@ -524,6 +521,23 @@ void density_dump ( int N ) { } +/** + * Factorize a given integer, attempts to keep larger pair of factors. + */ +void factor( int value, int *f1, int *f2 ) { + int j; + int i; + + j = (int) sqrt( value ); + for ( i = j; i > 0; i-- ) { + if ( ( value % i ) == 0 ) { + *f1 = i; + *f2 = value / i; + break; + } + } + } + /** * @brief Main routine that loads a few particles and generates some output. @@ -570,14 +584,21 @@ int main ( int argc , char *argv[] ) { if ( myrank == 0 ) message( "MPI is up and running with %i nodes." , nr_nodes ); fflush(stdout); + + /* Set a default grid so that grid[0]*grid[1]*grid[2] == nr_nodes. */ + factor( nr_nodes, &grid[0], &grid[1] ); + factor( nr_nodes / grid[1], &grid[0], &grid[2] ); + factor( grid[0] * grid[1], &grid[1], &grid[0] ); #endif /* Greeting message */ - message( "This is %s\n", package_description() ); + if ( myrank == 0 ) + message( "This is %s\n", package_description() ); /* Init the space. */ bzero( &s , sizeof(struct space) ); + /* Parse the options */ while ( ( c = getopt( argc , argv , "a:c:d:f:g:m:q:r:s:t:w:z:" ) ) != -1 ) switch( c ) @@ -609,8 +630,6 @@ int main ( int argc , char *argv[] ) { case 'g': if ( sscanf( optarg , "%i %i %i" , &grid[0] , &grid[1] , &grid[2] ) != 3 ) error( "Error parsing grid." ); - if ( myrank == 0 ) - message( "grid set to [ %i %i %i ]." , grid[0] , grid[1] , grid[2] ); fflush(stdout); break; case 'm': if ( sscanf( optarg , "%lf" , &h_max ) != 1 ) @@ -635,7 +654,6 @@ int main ( int argc , char *argv[] ) { case 't': if ( sscanf( optarg , "%d" , &nr_threads ) != 1 ) error( "Error parsing number of threads." ); - omp_set_num_threads( nr_threads ); break; case 'w': if ( sscanf( optarg , "%d" , &space_subsize ) != 1 ) @@ -654,7 +672,11 @@ int main ( int argc , char *argv[] ) { break; } - + +#if defined( WITH_MPI ) + if ( myrank == 0 ) + message( "grid set to [ %i %i %i ]." , grid[0] , grid[1] , grid[2] ); fflush(stdout); +#endif /* How large are the parts? */ if ( myrank == 0 ) { diff --git a/examples/test_single.c b/examples/test_single.c index 9387f826acf4e43ed8aebc6e9242a9ac8afbee15..36c219eebf214feaf775283834578c5a487d5e90 100644 --- a/examples/test_single.c +++ b/examples/test_single.c @@ -31,7 +31,6 @@ #include <float.h> #include <limits.h> #include <fenv.h> -#include <omp.h> /* Conditional headers. */ #ifdef HAVE_LIBZ @@ -63,6 +62,9 @@ int main ( int argc , char *argv[] ) { struct part p1, p2; float x, w, dwdx, r2, dx[3] = { 0.0f , 0.0f , 0.0f }, gradw[3]; + /* Greeting message */ + printf( "This is %s\n", package_description() ); + /* Init the particles. */ for ( k = 0 ; k < 3 ; k++ ) { p1.a[k] = 0.0f; p1.v[k] = 0.0f; p1.x[k] = 0.0; diff --git a/src/Makefile.am b/src/Makefile.am index 99ebb246b99a620b0207784b132e0a8275a12d36..78f29fe5b87b0ef8ef6a0948039df2ca52e712aa 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -17,7 +17,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Add the debug flag to the whole thing -AM_CFLAGS = -DTIMER -DCOUNTER -DCPU_TPS=2.30e9 +AM_CFLAGS = -DTIMER -DCOUNTER -DCPU_TPS=2.30e9 $(HDF5_CPPFLAGS) # Assign a "safe" version number AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) $(HDF5_LDFLAGS) -version-info 0:0:0 # -fsanitize=address @@ -52,17 +52,24 @@ libswiftsim_la_SOURCES = $(AM_SOURCES) # Sources and flags for MPI library libswiftsim_mpi_la_SOURCES = $(AM_SOURCES) -libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) -DWITH_MPI +libswiftsim_mpi_la_CFLAGS = $(AM_CFLAGS) -DWITH_MPI $(METIS_INCS) libswiftsim_mpi_la_SHORTNAME = mpi # Versioning. If any sources change then update the version.h file with # the current git revision and package version. +# May have a checkout without a version.h file and no git command (tar/zip +# download), allow that, but make sure we know it. version.h: version.h.in $(AM_SOURCES) $(include_HEADERS) $(noinst_HEADERS) if test "X$(GIT_CMD)" != "X"; then \ GIT_REVISION=`git describe --abbrev=8 --always --tags --dirty`; \ sed -e "s,@PACKAGE_VERSION\@,$(PACKAGE_VERSION)," \ -e "s,@GIT_REVISION\@,$${GIT_REVISION}," version.h.in > version.h; \ + else \ + if test ! -f version.h; then \ + sed -e "s,@PACKAGE_VERSION\@,$(PACKAGE_VERSION)," \ + -e "s,@GIT_REVISION\@,unknown," version.h.in > version.h; \ + fi; \ fi # Make sure version.h is built first. diff --git a/src/engine.c b/src/engine.c index f730f0fe4784ecaf830f088886bf25c7d335c873..88d9547b83a10327a6a7183f13f5bb499da43948 100644 --- a/src/engine.c +++ b/src/engine.c @@ -23,8 +23,6 @@ /* Some standard headers. */ #include <float.h> #include <limits.h> -#include <math.h> -#include <omp.h> #include <sched.h> #include <stdio.h> #include <stdlib.h> @@ -34,12 +32,11 @@ /* MPI headers. */ #ifdef WITH_MPI #include <mpi.h> -#endif - -/* METIS headers. */ +/* METIS headers only used when MPI is also available. */ #ifdef HAVE_METIS #include <metis.h> #endif +#endif /* This object's header. */ #include "engine.h" diff --git a/src/proxy.c b/src/proxy.c index 447a7a90be24d8dcb196d73ed62611464d5f3c2d..bafa185cdcaf2100992398657b650a954daceb91 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -23,9 +23,6 @@ /* Some standard headers. */ #include <float.h> #include <limits.h> -#include <math.h> -#include <omp.h> -#include <pthread.h> #include <sched.h> #include <stdio.h> #include <stdlib.h> diff --git a/src/runner.c b/src/runner.c index 173215a6a53a0627408f3b5b61d0805fee597e2d..a3056f414bd352c2a9b857b980e7989dfc28130a 100644 --- a/src/runner.c +++ b/src/runner.c @@ -23,12 +23,6 @@ /* Some standard headers. */ #include <float.h> #include <limits.h> -#include <math.h> -#include <omp.h> -#include <pthread.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> /* MPI headers. */ #ifdef WITH_MPI diff --git a/src/scheduler.c b/src/scheduler.c index bbcac93be24066e05878446eba7c60a5d80f4bf8..02defc31710e3ab0de067cf359d4281b94c17d90 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -23,7 +23,6 @@ /* Some standard headers. */ #include <limits.h> #include <math.h> -#include <omp.h> #include <pthread.h> #include <stdio.h> #include <stdlib.h> @@ -803,7 +802,6 @@ void scheduler_reweight(struct scheduler *s) { /* Run throught the tasks backwards and set their waits and weights. */ // tic = getticks(); - // #pragma omp parallel for schedule(static) private(t,j) for (k = nr_tasks - 1; k >= 0; k--) { t = &tasks[tid[k]]; t->weight = 0; @@ -887,7 +885,6 @@ void scheduler_start(struct scheduler *s, unsigned int mask) { /* Run throught the tasks and set their waits. */ // tic = getticks(); - // #pragma omp parallel for schedule(static) private(t,j) for (k = nr_tasks - 1; k >= 0; k--) { t = &tasks[tid[k]]; t->wait = 0; diff --git a/src/space.c b/src/space.c index f99f59329212a574ccacdb934133be1b3348ea61..fcdbfa24906153252f3b8a8855ca63703c648da6 100644 --- a/src/space.c +++ b/src/space.c @@ -24,10 +24,6 @@ #include <float.h> #include <limits.h> #include <math.h> -#include <omp.h> -#include <pthread.h> -#include <stdio.h> -#include <stdlib.h> #include <string.h> /* MPI headers. */ @@ -297,12 +293,12 @@ void space_regrid(struct space *s, double cell_max) { void space_rebuild(struct space *s, double cell_max) { - int j, k, cdim[3], nr_gparts = s->nr_gparts; + int j, k, cdim[3], nr_parts = s->nr_parts, nr_gparts = s->nr_gparts; struct cell *restrict c, *restrict cells; struct part *restrict finger, *restrict p, *parts = s->parts; struct xpart *xfinger, *xparts = s->xparts; struct gpart *gp, *gparts = s->gparts, *gfinger; - int *cell_id; + int *ind; double ih[3], dim[3]; // ticks tic; @@ -315,8 +311,8 @@ void space_rebuild(struct space *s, double cell_max) { /* Run through the particles and get their cell index. */ // tic = getticks(); - const int cell_id_size = s->nr_parts; - if ((cell_id = (int *)malloc(sizeof(int) * cell_id_size)) == NULL) + const int ind_size = s->size_parts; + if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL) error("Failed to allocate temporary particle indices."); ih[0] = s->ih[0]; ih[1] = s->ih[1]; @@ -327,18 +323,16 @@ void space_rebuild(struct space *s, double cell_max) { cdim[0] = s->cdim[0]; cdim[1] = s->cdim[1]; cdim[2] = s->cdim[2]; - for (k = 0; k < s->nr_parts; k++) { + for (k = 0; k < nr_parts; k++) { p = &parts[k]; for (j = 0; j < 3; j++) if (p->x[j] < 0.0) p->x[j] += dim[j]; else if (p->x[j] >= dim[j]) p->x[j] -= dim[j]; - cell_id[k] = + ind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - if (cell_id[k] < 0 || cell_id[k] >= s->nr_cells) - error("Bad cell id %i.", cell_id[k]); - atomic_inc(&cells[cell_id[k]].count); + cells[ind[k]].count++; } // message( "getting particle indices took %.3f ms." , (double)(getticks() - // tic) / CPU_TPS * 1000 ); @@ -346,80 +340,78 @@ void space_rebuild(struct space *s, double cell_max) { #ifdef WITH_MPI /* Move non-local parts to the end of the list. */ int nodeID = s->e->nodeID; - int nr_local_parts = s->nr_parts; - for (k = 0; k < nr_local_parts; k++) - if (cells[cell_id[k]].nodeID != nodeID) { - cells[cell_id[k]].count -= 1; - nr_local_parts -= 1; + for (k = 0; k < nr_parts; k++) + if (cells[ind[k]].nodeID != nodeID) { + cells[ind[k]].count -= 1; + nr_parts -= 1; struct part tp = parts[k]; - parts[k] = parts[nr_local_parts]; - parts[nr_local_parts] = tp; + parts[k] = parts[nr_parts]; + parts[nr_parts] = tp; struct xpart txp = xparts[k]; - xparts[k] = xparts[nr_local_parts]; - xparts[nr_local_parts] = txp; - int t = cell_id[k]; - cell_id[k] = cell_id[nr_local_parts]; - cell_id[nr_local_parts] = t; + xparts[k] = xparts[nr_parts]; + xparts[nr_parts] = txp; + int t = ind[k]; + ind[k] = ind[nr_parts]; + ind[nr_parts] = t; } /* Exchange the strays, note that this potentially re-allocates the parts arrays. */ s->nr_parts = - nr_local_parts + engine_exchange_strays(s->e, nr_local_parts, - &cell_id[nr_local_parts], - s->nr_parts - nr_local_parts); + nr_parts + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], + s->nr_parts - nr_parts); parts = s->parts; xparts = s->xparts; /* Re-allocate the index array if needed.. */ - if (s->nr_parts > cell_id_size) { - int *cell_id_new; - if ((cell_id_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) + if (s->nr_parts > ind_size) { + int *ind_new; + if ((ind_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) error("Failed to allocate temporary particle indices."); - memcpy(cell_id_new, cell_id, sizeof(int) * nr_local_parts); - free(cell_id); - cell_id = cell_id_new; + memcpy(ind_new, ind, sizeof(int) * nr_parts); + free(ind); + ind = ind_new; } /* Assign each particle to its cell. */ - for (k = nr_local_parts; k < s->nr_parts; k++) { + for (k = nr_parts; k < s->nr_parts; k++) { p = &parts[k]; - cell_id[k] = + ind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - cells[cell_id[k]].count += 1; - if (cells[cell_id[k]].nodeID != nodeID) - error( - "Received part that does not belong to me (nodeID=%i, x=[%e,%e,%e]).", - cells[cell_id[k]].nodeID, p->x[0], p->x[1], p->x[2]); + cells[ind[k]].count += 1; + /* if ( cells[ ind[k] ].nodeID != nodeID ) + error( "Received part that does not belong to me (nodeID=%i)." , cells[ + ind[k] ].nodeID ); */ } + nr_parts = s->nr_parts; #endif /* Sort the parts according to their cells. */ // tic = getticks(); - parts_sort(parts, xparts, cell_id, s->nr_parts, 0, s->nr_cells - 1); + parts_sort(parts, xparts, ind, nr_parts, 0, s->nr_cells - 1); // message( "parts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS // * 1000 ); /* Re-link the gparts. */ - for (k = 0; k < s->nr_parts; k++) + for (k = 0; k < nr_parts; k++) if (parts[k].gpart != NULL) parts[k].gpart->part = &parts[k]; /* Verify sort. */ /* for ( k = 1 ; k < nr_parts ; k++ ) { - if ( cell_id[k-1] > cell_id[k] ) { + if ( ind[k-1] > ind[k] ) { error( "Sort failed!" ); } - else if ( cell_id[k] != cell_getid( cdim , parts[k].x[0]*ih[0] , + else if ( ind[k] != cell_getid( cdim , parts[k].x[0]*ih[0] , parts[k].x[1]*ih[1] , parts[k].x[2]*ih[2] ) ) error( "Incorrect indices!" ); } */ /* We no longer need the indices as of here. */ - free(cell_id); + free(ind); /* Run through the gravity particles and get their cell index. */ // tic = getticks(); - if ((cell_id = (int *)malloc(sizeof(int) * s->size_gparts)) == NULL) + if ((ind = (int *)malloc(sizeof(int) * s->size_gparts)) == NULL) error("Failed to allocate temporary particle indices."); for (k = 0; k < nr_gparts; k++) { gp = &gparts[k]; @@ -428,9 +420,9 @@ void space_rebuild(struct space *s, double cell_max) { gp->x[j] += dim[j]; else if (gp->x[j] >= dim[j]) gp->x[j] -= dim[j]; - cell_id[k] = + ind[k] = cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); - atomic_inc(&cells[cell_id[k]].gcount); + cells[ind[k]].gcount++; } // message( "getting particle indices took %.3f ms." , (double)(getticks() - // tic) / CPU_TPS * 1000 ); @@ -439,7 +431,7 @@ void space_rebuild(struct space *s, double cell_max) { /* Sort the parts according to their cells. */ // tic = getticks(); - gparts_sort(gparts, cell_id, nr_gparts, 0, s->nr_cells - 1); + gparts_sort(gparts, ind, nr_gparts, 0, s->nr_cells - 1); // message( "gparts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS // * 1000 ); @@ -448,7 +440,7 @@ void space_rebuild(struct space *s, double cell_max) { if (gparts[k].id > 0) gparts[k].part->gpart = &gparts[k]; /* We no longer need the indices as of here. */ - free(cell_id); + free(ind); /* Hook the cells up to the parts. */ // tic = getticks(); @@ -470,17 +462,8 @@ void space_rebuild(struct space *s, double cell_max) { /* At this point, we have the upper-level cells, old or new. Now make sure that the parts in each cell are ok. */ // tic = getticks(); - k = 0; - { - if (omp_get_thread_num() < 8) - while (1) { - int myk = atomic_inc(&k); - if (myk < s->nr_cells) - space_split(s, &cells[myk]); - else - break; - } - } + for (k = 0; k < s->nr_cells; k++) space_split(s, &cells[k]); + // message( "space_split took %.3f ms." , (double)(getticks() - tic) / CPU_TPS // * 1000 ); } @@ -532,130 +515,105 @@ void parts_sort(struct part *parts, struct xpart *xparts, int *ind, int N, last = 1; waiting = 1; -/* Parallel bit. */ -#pragma omp parallel default(shared) \ - shared(N, first, last, waiting, qstack, parts, xparts, ind, qstack_size, \ - stderr, engine_rank) private(pivot, i, ii, j, jj, min, max, temp_i, \ - qid, temp_xp, temp_p) - { + /* Main loop. */ + while (waiting > 0) { - /* Main loop. */ - if (omp_get_thread_num() < 8) - while (waiting > 0) { - - /* Grab an interval off the queue. */ - qid = atomic_inc(&first) % qstack_size; - - /* Wait for the interval to be ready. */ - while (waiting > 0 && atomic_cas(&qstack[qid].ready, 1, 1) != 1) - ; - - /* Broke loop for all the wrong reasons? */ - if (waiting == 0) break; - - /* Get the stack entry. */ - i = qstack[qid].i; - j = qstack[qid].j; - min = qstack[qid].min; - max = qstack[qid].max; - qstack[qid].ready = 0; - // message( "thread %i got interval [%i,%i] with values in [%i,%i]." , - // omp_get_thread_num() , i , j , min , max ); - - /* Loop over sub-intervals. */ - while (1) { - - /* Bring beer. */ - pivot = (min + max) / 2; - - /* One pass of QuickSort's partitioning. */ - ii = i; - jj = j; - while (ii < jj) { - while (ii <= j && ind[ii] <= pivot) ii++; - while (jj >= i && ind[jj] > pivot) jj--; - if (ii < jj) { - temp_i = ind[ii]; - ind[ii] = ind[jj]; - ind[jj] = temp_i; - temp_p = parts[ii]; - parts[ii] = parts[jj]; - parts[jj] = temp_p; - temp_xp = xparts[ii]; - xparts[ii] = xparts[jj]; - xparts[jj] = temp_xp; - } - } + /* Grab an interval off the queue. */ + qid = (first++) % qstack_size; - /* Verify sort. */ - /* for ( int k = i ; k <= jj ; k++ ) - if ( ind[k] > pivot ) { - message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, - j=%i, N=%i." , k , ind[k] , pivot , i , j , N ); - error( "Partition failed (<=pivot)." ); - } - for ( int k = jj+1 ; k <= j ; k++ ) - if ( ind[k] <= pivot ) { - message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, - j=%i, N=%i." , k , ind[k] , pivot , i , j , N ); - error( "Partition failed (>pivot)." ); - } */ - - /* Split-off largest interval. */ - if (jj - i > j - jj + 1) { - - /* Recurse on the left? */ - if (jj > i && pivot > min) { - qid = atomic_inc(&last) % qstack_size; - while (atomic_cas(&qstack[qid].ready, 0, 0) != 0) - ; - qstack[qid].i = i; - qstack[qid].j = jj; - qstack[qid].min = min; - qstack[qid].max = pivot; - qstack[qid].ready = 1; - if (atomic_inc(&waiting) >= qstack_size) - error("Qstack overflow."); - } - - /* Recurse on the right? */ - if (jj + 1 < j && pivot + 1 < max) { - i = jj + 1; - min = pivot + 1; - } else - break; - - } else { - - /* Recurse on the right? */ - if (jj + 1 < j && pivot + 1 < max) { - qid = atomic_inc(&last) % qstack_size; - while (atomic_cas(&qstack[qid].ready, 0, 0) != 0) - ; - qstack[qid].i = jj + 1; - qstack[qid].j = j; - qstack[qid].min = pivot + 1; - qstack[qid].max = max; - qstack[qid].ready = 1; - if (atomic_inc(&waiting) >= qstack_size) - error("Qstack overflow."); - } - - /* Recurse on the left? */ - if (jj > i && pivot > min) { - j = jj; - max = pivot; - } else - break; - } + /* Get the stack entry. */ + i = qstack[qid].i; + j = qstack[qid].j; + min = qstack[qid].min; + max = qstack[qid].max; + qstack[qid].ready = 0; - } /* loop over sub-intervals. */ + /* Loop over sub-intervals. */ + while (1) { - atomic_dec(&waiting); + /* Bring beer. */ + pivot = (min + max) / 2; + + /* One pass of QuickSort's partitioning. */ + ii = i; + jj = j; + while (ii < jj) { + while (ii <= j && ind[ii] <= pivot) ii++; + while (jj >= i && ind[jj] > pivot) jj--; + if (ii < jj) { + temp_i = ind[ii]; + ind[ii] = ind[jj]; + ind[jj] = temp_i; + temp_p = parts[ii]; + parts[ii] = parts[jj]; + parts[jj] = temp_p; + temp_xp = xparts[ii]; + xparts[ii] = xparts[jj]; + xparts[jj] = temp_xp; + } + } - } /* main loop. */ + /* Verify sort. */ + /* for ( int k = i ; k <= jj ; k++ ) + if ( ind[k] > pivot ) { + message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, + N=%i." , k , ind[k] , pivot , i , j , N ); + error( "Partition failed (<=pivot)." ); + } + for ( int k = jj+1 ; k <= j ; k++ ) + if ( ind[k] <= pivot ) { + message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, + N=%i." , k , ind[k] , pivot , i , j , N ); + error( "Partition failed (>pivot)." ); + } */ + + /* Split-off largest interval. */ + if (jj - i > j - jj + 1) { + + /* Recurse on the left? */ + if (jj > i && pivot > min) { + qid = (last++) % qstack_size; + qstack[qid].i = i; + qstack[qid].j = jj; + qstack[qid].min = min; + qstack[qid].max = pivot; + qstack[qid].ready = 1; + if (waiting++ >= qstack_size) error("Qstack overflow."); + } - } /* parallel bit. */ + /* Recurse on the right? */ + if (jj + 1 < j && pivot + 1 < max) { + i = jj + 1; + min = pivot + 1; + } else + break; + + } else { + + /* Recurse on the right? */ + if (jj + 1 < j && pivot + 1 < max) { + qid = (last++) % qstack_size; + qstack[qid].i = jj + 1; + qstack[qid].j = j; + qstack[qid].min = pivot + 1; + qstack[qid].max = max; + qstack[qid].ready = 1; + if ((waiting++) >= qstack_size) error("Qstack overflow."); + } + + /* Recurse on the left? */ + if (jj > i && pivot > min) { + j = jj; + max = pivot; + } else + break; + } + + } /* loop over sub-intervals. */ + + waiting--; + + } /* main loop. */ /* Verify sort. */ /* for ( i = 1 ; i < N ; i++ ) @@ -700,126 +658,102 @@ void gparts_sort(struct gpart *gparts, int *ind, int N, int min, int max) { last = 1; waiting = 1; -/* Parallel bit. */ -#pragma omp parallel default(shared) shared( \ - N, first, last, waiting, qstack, gparts, ind, qstack_size, stderr, \ - engine_rank) private(pivot, i, ii, j, jj, min, max, temp_i, qid, temp_p) - { + /* Main loop. */ + while (waiting > 0) { - /* Main loop. */ - if (omp_get_thread_num() < 8) - while (waiting > 0) { - - /* Grab an interval off the queue. */ - qid = atomic_inc(&first) % qstack_size; - - /* Wait for the interval to be ready. */ - while (waiting > 0 && atomic_cas(&qstack[qid].ready, 1, 1) != 1) - ; - - /* Broke loop for all the wrong reasons? */ - if (waiting == 0) break; - - /* Get the stack entry. */ - i = qstack[qid].i; - j = qstack[qid].j; - min = qstack[qid].min; - max = qstack[qid].max; - qstack[qid].ready = 0; - // message( "thread %i got interval [%i,%i] with values in [%i,%i]." , - // omp_get_thread_num() , i , j , min , max ); - - /* Loop over sub-intervals. */ - while (1) { - - /* Bring beer. */ - pivot = (min + max) / 2; - - /* One pass of QuickSort's partitioning. */ - ii = i; - jj = j; - while (ii < jj) { - while (ii <= j && ind[ii] <= pivot) ii++; - while (jj >= i && ind[jj] > pivot) jj--; - if (ii < jj) { - temp_i = ind[ii]; - ind[ii] = ind[jj]; - ind[jj] = temp_i; - temp_p = gparts[ii]; - gparts[ii] = gparts[jj]; - gparts[jj] = temp_p; - } - } + /* Grab an interval off the queue. */ + qid = (first++) % qstack_size; - /* Verify sort. */ - /* for ( int k = i ; k <= jj ; k++ ) - if ( ind[k] > pivot ) { - message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, - j=%i, N=%i." , k , ind[k] , pivot , i , j , N ); - error( "Partition failed (<=pivot)." ); - } - for ( int k = jj+1 ; k <= j ; k++ ) - if ( ind[k] <= pivot ) { - message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, - j=%i, N=%i." , k , ind[k] , pivot , i , j , N ); - error( "Partition failed (>pivot)." ); - } */ - - /* Split-off largest interval. */ - if (jj - i > j - jj + 1) { - - /* Recurse on the left? */ - if (jj > i && pivot > min) { - qid = atomic_inc(&last) % qstack_size; - while (atomic_cas(&qstack[qid].ready, 0, 0) != 0) - ; - qstack[qid].i = i; - qstack[qid].j = jj; - qstack[qid].min = min; - qstack[qid].max = pivot; - qstack[qid].ready = 1; - if (atomic_inc(&waiting) >= qstack_size) - error("Qstack overflow."); - } - - /* Recurse on the right? */ - if (jj + 1 < j && pivot + 1 < max) { - i = jj + 1; - min = pivot + 1; - } else - break; - - } else { - - /* Recurse on the right? */ - if (jj + 1 < j && pivot + 1 < max) { - qid = atomic_inc(&last) % qstack_size; - while (atomic_cas(&qstack[qid].ready, 0, 0) != 0) - ; - qstack[qid].i = jj + 1; - qstack[qid].j = j; - qstack[qid].min = pivot + 1; - qstack[qid].max = max; - qstack[qid].ready = 1; - if (atomic_inc(&waiting) >= qstack_size) - error("Qstack overflow."); - } - - /* Recurse on the left? */ - if (jj > i && pivot > min) { - j = jj; - max = pivot; - } else - break; - } + /* Get the stack entry. */ + i = qstack[qid].i; + j = qstack[qid].j; + min = qstack[qid].min; + max = qstack[qid].max; + qstack[qid].ready = 0; + + /* Loop over sub-intervals. */ + while (1) { + + /* Bring beer. */ + pivot = (min + max) / 2; + + /* One pass of QuickSort's partitioning. */ + ii = i; + jj = j; + while (ii < jj) { + while (ii <= j && ind[ii] <= pivot) ii++; + while (jj >= i && ind[jj] > pivot) jj--; + if (ii < jj) { + temp_i = ind[ii]; + ind[ii] = ind[jj]; + ind[jj] = temp_i; + temp_p = gparts[ii]; + gparts[ii] = gparts[jj]; + gparts[jj] = temp_p; + } + } + + /* Verify sort. */ + /* for ( int k = i ; k <= jj ; k++ ) + if ( ind[k] > pivot ) { + message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, + N=%i." , k , ind[k] , pivot , i , j , N ); + error( "Partition failed (<=pivot)." ); + } + for ( int k = jj+1 ; k <= j ; k++ ) + if ( ind[k] <= pivot ) { + message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, + N=%i." , k , ind[k] , pivot , i , j , N ); + error( "Partition failed (>pivot)." ); + } */ + + /* Split-off largest interval. */ + if (jj - i > j - jj + 1) { + + /* Recurse on the left? */ + if (jj > i && pivot > min) { + qid = (last++) % qstack_size; + qstack[qid].i = i; + qstack[qid].j = jj; + qstack[qid].min = min; + qstack[qid].max = pivot; + qstack[qid].ready = 1; + if ((waiting++) >= qstack_size) error("Qstack overflow."); + } + + /* Recurse on the right? */ + if (jj + 1 < j && pivot + 1 < max) { + i = jj + 1; + min = pivot + 1; + } else + break; + + } else { - } /* loop over sub-intervals. */ + /* Recurse on the right? */ + if (jj + 1 < j && pivot + 1 < max) { + qid = (last++) % qstack_size; + qstack[qid].i = jj + 1; + qstack[qid].j = j; + qstack[qid].min = pivot + 1; + qstack[qid].max = max; + qstack[qid].ready = 1; + if ((waiting++) >= qstack_size) error("Qstack overflow."); + } + + /* Recurse on the left? */ + if (jj > i && pivot > min) { + j = jj; + max = pivot; + } else + break; + } - atomic_dec(&waiting); + } /* loop over sub-intervals. */ - } /* main loop. */ + waiting--; - } /* parallel bit. */ + } /* main loop. */ /* Verify sort. */ /* for ( i = 1 ; i < N ; i++ ) @@ -872,20 +806,11 @@ void space_map_parts(struct space *s, } /* Call the recursive function on all higher-level cells. */ - { - int mycid; - while (1) { - mycid = cid++; - if (mycid < s->nr_cells) - rec_map(&s->cells[mycid]); - else - break; - } - } + for (cid = 0; cid < s->nr_cells; cid++) rec_map(&s->cells[cid]); } /** - * @brief Map a function to all particles in a space. + * @brief Map a function to all particles in a aspace. * * @param s The #space we are working in. * @param full Map to all cells, including cells with sub-cells. @@ -912,16 +837,7 @@ void space_map_cells_post(struct space *s, int full, } /* Call the recursive function on all higher-level cells. */ - { - int mycid; - while (1) { - mycid = cid++; - if (mycid < s->nr_cells) - rec_map(&s->cells[mycid]); - else - break; - } - } + for (cid = 0; cid < s->nr_cells; cid++) rec_map(&s->cells[cid]); } void space_map_cells_pre(struct space *s, int full, @@ -943,16 +859,7 @@ void space_map_cells_pre(struct space *s, int full, } /* Call the recursive function on all higher-level cells. */ - { - int mycid; - while (1) { - mycid = cid++; - if (mycid < s->nr_cells) - rec_map(&s->cells[mycid]); - else - break; - } - } + for (cid = 0; cid < s->nr_cells; cid++) rec_map(&s->cells[cid]); } /** @@ -965,7 +872,7 @@ void space_map_cells_pre(struct space *s, int full, void space_split(struct space *s, struct cell *c) { int k, count = c->count, gcount = c->gcount, maxdepth = 0; - float h, h_max = 0.0f, dt, dt_min = FLT_MAX, dt_max = FLT_MIN; + float h, h_max = 0.0f, dt, dt_min = c->parts[0].dt, dt_max = dt_min; struct cell *temp; struct part *p, *parts = c->parts; struct xpart *xp, *xparts = c->xparts; diff --git a/src/task.c b/src/task.c index 19b96799c7eb2e139457e8ed9202d005228b82e0..949caab56c4c4d8a0e3c73d05014ebc5ad68657a 100644 --- a/src/task.c +++ b/src/task.c @@ -23,9 +23,6 @@ /* Some standard headers. */ #include <float.h> #include <limits.h> -#include <math.h> -#include <omp.h> -#include <pthread.h> #include <sched.h> #include <stdio.h> #include <stdlib.h>