From 7e23a15d09f4918f8112e8599a40b01562369ec5 Mon Sep 17 00:00:00 2001
From: "Peter W. Draper" <p.w.draper@durham.ac.uk>
Date: Fri, 27 Oct 2023 14:30:59 +0100
Subject: [PATCH] Add optimizations for the AMD genoa and bergamo chips

These have AVX512. With the Intel compilers you need to be careful not to use -x optimizations so it isn't straight-forward to get these optimizations, faking skylake-avx512 seems to work.
---
 m4/ax_cc_maxopt.m4 | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/m4/ax_cc_maxopt.m4 b/m4/ax_cc_maxopt.m4
index 282668af72..87d5a7e4cd 100644
--- a/m4/ax_cc_maxopt.m4
+++ b/m4/ax_cc_maxopt.m4
@@ -125,11 +125,11 @@ if test "$ac_test_CFLAGS" != "set"; then
 		    *2?6[[ad]]?:*:*:*) icc_flags="-xAVX -SSE4.2 -xS -xT -xB -xK" ;; # Sandy-bridge
 		    *3?6[[ae]]?:*:*:*) icc_flags="-xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;; #Ivy-bridge
 		    *3?6[[cf]]?:*:*:*|*4?6[[56]]?:*:*:*|*4?6[[ef]]?:*:*:*) icc_flags="-xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;; # Haswell
-		    *3?6d?:*:*:*|*4?6[[7f]]?:*:*:*|*5?66?:*:*:*) icc_flags="-xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;; # Broadwell
+		    *3?6d?:*:*:*|*4?6[[7f]]?:*:*:*|*5?66?:*:*:*) icc_flags=" -xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;; # Broadwell
 		    *4?6[[de]]?:*:*:*) icc_flags="-xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;; # Skylake
 		    *5?6[[56]]?:*:*:*) icc_flags="-xCORE-AVX512 -xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;; # Skylake-AVX512
 		    *5?67?:*:*:*) icc_flags="-xMIC-AVX512 -xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;; # Knights-Landing
-		    *8?6[[de]]?:*:*:*|*9?6[[de]]?:*:*:*) icc_flags="-xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;;# Kabylake 
+		    *8?6[[de]]?:*:*:*|*9?6[[de]]?:*:*:*) icc_flags="-xCORE-AVX2 -xCORE-AVX-I -xAVX -SSE4.2 -xS -xT -xB -xK" ;;# Kabylake
 		    *000?f[[346]]?:*:*:*|?f[[346]]?:*:*:*|f[[346]]?:*:*:*) icc_flags="-xSSE3 -xP -xO -xN -xW -xK" ;;
 		    *00??f??:*:*:*|??f??:*:*:*|?f??:*:*:*|f??:*:*:*) icc_flags="-xSSE2 -xN -xW -xK" ;;
                   esac ;;
@@ -139,7 +139,14 @@ if test "$ac_test_CFLAGS" != "set"; then
                     *06??f??:*:*:*|6??f??:*:*:*) icc_flags="-march=core-avx2" ;;
                     *070?f??:*:*:*|70?f??:*:*:*) icc_flags="-march=core-avx2" ;;
                                    83?f??:*:*:*) icc_flags="-march=core-avx2"
-                                                 CFLAGS="$CFLAGS -fma -ftz -fomit-frame-pointer";; # EPYC
+                                                 CFLAGS="$CFLAGS -fma -ftz -fomit-frame-pointer";; # ROME
+                                   a0?f??:*:*:*) icc_flags="-march=core-avx2"
+                                                 CFLAGS="$CFLAGS -fma -ftz -fomit-frame-pointer";; # MILAN
+                                   a1?f??:*:*:*) icc_flags="-axCORE-AVX512"
+                                                 CFLAGS="$CFLAGS -march=skylake-avx512 -fma -ftz -fomit-frame-pointer";; # GENOA
+                                   aa?f??:*:*:*) icc_flags="-axCORE-AVX512"
+                                                 CFLAGS="$CFLAGS -march=skylake-avx512 -fma -ftz -fomit-frame-pointer";; # BERGAMO
+
                   esac ;;
               esac ;;
           esac
-- 
GitLab