To run 4 MPI ranks on a KNL setup in SNC-4 mode in flat mode you can use the following commands:
# Allows MPI to be run locally
unset I_MPI_HYDRA_BOOTSTRAP
# Places user allocated memory and MPI memory in the correct sub-numa region in MCDRAM
export I_MPI_HBW_POLICY=hbw_preferred,hbw_preferred
# Makes sure that each MPI rank is assigned a sub-numa region of its own. So that it only sees 16 cores when it pins threads.
export I_MPI_PIN_DOMAIN=numa
# Gives debug information about MPI
export I_MPI_DEBUG=5