Skip to content

Commit

Permalink
Merge branch 'ndk/machinefiles/pm-cpu-remove-pelayout-workaround' int…
Browse files Browse the repository at this point in the history
…o next (PR #5971)

Adjust pelayouts for pm-cpu/alvarez.
After late September Perlmutter maintenance, some HW/SW changes seems to have resolved the issue we were seeing with slower-than-expected rearranger performance (showing as time in CPL).
There is still work to further improve the pelayouts on pm-cpu, but this PR simply removes the
CPL_PSTRID=8 work-around.

[bfb] except there are coupler namelist diffs due to different number of ranks
  • Loading branch information
ndkeen committed Oct 11, 2023
2 parents 886da34 + 0410ff3 commit ff0eec8
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 45 deletions.
42 changes: 12 additions & 30 deletions cime_config/allactive/config_pesall.xml
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@
<grid name="a%ne30np4">
<mach name="pm-cpu|alvarez">
<pes compset="JRA_ELM.+MPASSI.+MPASO.+MOSART.+SGLC.+SWAV" pesize="any">
<comment>"pm-cpu 4 nodes, 256 partition, 128x1, c8"</comment>
<comment>"pm-cpu 4 nodes, 256 partition, 128x1"</comment>
<ntasks>
<ntasks_atm>-4</ntasks_atm>
<ntasks_lnd>-4</ntasks_lnd>
Expand All @@ -274,11 +274,8 @@
<ntasks_ocn>-4</ntasks_ocn>
<ntasks_glc>-1</ntasks_glc>
<ntasks_wav>-1</ntasks_wav>
<ntasks_cpl>64</ntasks_cpl>
<ntasks_cpl>-4</ntasks_cpl>
</ntasks>
<pstrid>
<pstrid_cpl>8</pstrid_cpl>
</pstrid>
</pes>
</mach>
<mach name="gcp12">
Expand Down Expand Up @@ -552,11 +549,11 @@
<grid name="a%ne120np4">
<mach name="pm-cpu|alvarez">
<pes compset=".*EAM.+ELM.+MPASSI.+MPASO.+MOSART.+SWAV.*" pesize="any">
<comment>ne120-wcycl on 42 nodes 128x1c8 ~0.7 sypd</comment>
<comment>ne120-wcycl on 42 nodes 128x1 ~0.7 sypd</comment>
<MAX_MPITASKS_PER_NODE>128</MAX_MPITASKS_PER_NODE>
<ntasks>
<ntasks_atm>3072</ntasks_atm>
<ntasks_cpl>384</ntasks_cpl>
<ntasks_cpl>3072</ntasks_cpl>
<ntasks_ice>3072</ntasks_ice>
<ntasks_lnd>2560</ntasks_lnd>
<ntasks_rof>512</ntasks_rof>
Expand Down Expand Up @@ -584,9 +581,6 @@
<rootpe_glc>0</rootpe_glc>
<rootpe_wav>0</rootpe_wav>
</rootpe>
<pstrid>
<pstrid_cpl>8</pstrid_cpl>
</pstrid>
</pes>
</mach>
<mach name="theta">
Expand Down Expand Up @@ -1252,15 +1246,15 @@
</mach>
<mach name="pm-cpu|alvarez">
<pes compset=".*EAM.+ELM.+MPASSI.+MPASO.+MOSART.+" pesize="any">
<comment> -compset A_WCYCL* -res ne30pg2_oECv3 with MPASO on 7 nodes, 128x1 c8 </comment>
<comment> -compset A_WCYCL* -res ne30pg2_oECv3 with MPASO on 7 nodes, 128x1 </comment>
<MAX_MPITASKS_PER_NODE>128</MAX_MPITASKS_PER_NODE>
<ntasks>
<ntasks_atm>640</ntasks_atm>
<ntasks_lnd>640</ntasks_lnd>
<ntasks_rof>640</ntasks_rof>
<ntasks_ice>640</ntasks_ice>
<ntasks_ocn>256</ntasks_ocn>
<ntasks_cpl>80</ntasks_cpl>
<ntasks_cpl>640</ntasks_cpl>
</ntasks>
<nthrds>
<nthrds_atm>1</nthrds_atm>
Expand All @@ -1278,20 +1272,17 @@
<rootpe_ocn>640</rootpe_ocn>
<rootpe_cpl>0</rootpe_cpl>
</rootpe>
<pstrid>
<pstrid_cpl>8</pstrid_cpl>
</pstrid>
</pes>
<pes compset=".*EAM.+ELM.+MPASSI.+MPASO.+MOSART.+" pesize="L">
<comment> -compset A_WCYCL* -res ne30pg2_oECv3 with MPASO on 58 nodes, ~20 sypd</comment>
<comment> -compset A_WCYCL* -res ne30pg2_oECv3 with MPASO on 58 nodes, 128x1, ~20 sypd</comment>
<MAX_MPITASKS_PER_NODE>128</MAX_MPITASKS_PER_NODE>
<ntasks>
<ntasks_atm>5504</ntasks_atm>
<ntasks_lnd>5248</ntasks_lnd>
<ntasks_rof>256</ntasks_rof>
<ntasks_ice>5248</ntasks_ice>
<ntasks_ocn>1920</ntasks_ocn>
<ntasks_cpl>688</ntasks_cpl>
<ntasks_cpl>5504</ntasks_cpl>
</ntasks>
<nthrds>
<nthrds_atm>1</nthrds_atm>
Expand All @@ -1309,9 +1300,6 @@
<rootpe_ocn>5504</rootpe_ocn>
<rootpe_cpl>0</rootpe_cpl>
</rootpe>
<pstrid>
<pstrid_cpl>8</pstrid_cpl>
</pstrid>
</pes>
</mach>
</grid>
Expand Down Expand Up @@ -1757,7 +1745,7 @@
</mach>
<mach name="pm-cpu|alvarez">
<pes compset="any" pesize="any">
<comment>"pm-cpu ne30np4 and ne30np4.pg2 2 nodes 1 thread, 128x1 c8"</comment>
<comment>"pm-cpu ne30np4 and ne30np4.pg2 2 nodes 1 thread, 128x1"</comment>
<ntasks>
<ntasks_atm>-2</ntasks_atm>
<ntasks_lnd>-2</ntasks_lnd>
Expand All @@ -1766,11 +1754,8 @@
<ntasks_ocn>-2</ntasks_ocn>
<ntasks_glc>-2</ntasks_glc>
<ntasks_wav>-2</ntasks_wav>
<ntasks_cpl>32</ntasks_cpl>
<ntasks_cpl>-2</ntasks_cpl>
</ntasks>
<pstrid>
<pstrid_cpl>8</pstrid_cpl>
</pstrid>
</pes>
</mach>
<mach name="crusher-scream-gpu">
Expand Down Expand Up @@ -2072,14 +2057,14 @@
</mach>
<mach name="pm-cpu|alvarez">
<pes compset=".*EAM.+ELM.+MPASSI.+MPASO.+MOSART.+" pesize="any">
<comment> 8 nodes, 128x1 c8</comment>
<comment> 8 nodes, 128x1</comment>
<ntasks>
<ntasks_atm>640</ntasks_atm>
<ntasks_lnd>640</ntasks_lnd>
<ntasks_rof>640</ntasks_rof>
<ntasks_ice>640</ntasks_ice>
<ntasks_ocn>384</ntasks_ocn>
<ntasks_cpl>80</ntasks_cpl>
<ntasks_cpl>640</ntasks_cpl>
</ntasks>
<rootpe>
<rootpe_atm>0</rootpe_atm>
Expand All @@ -2097,9 +2082,6 @@
<nthrds_ocn>1</nthrds_ocn>
<nthrds_cpl>1</nthrds_cpl>
</nthrds>
<pstrid>
<pstrid_cpl>8</pstrid_cpl>
</pstrid>
</pes>
</mach>
</grid>
Expand Down
14 changes: 4 additions & 10 deletions components/eam/cime_config/config_pes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -810,19 +810,16 @@
<!--Pes setting: grid is a%ne30np4_l%ne30np4_oi%oEC60to30v3_r%r05_g%null_w%null_z%null_m%oEC60to30v3
Pes setting: compset is 2010_EAM%CMIP6_ELM%SPBC_MPASSI%PRES_DOCN%DOM_MOSART_SGLC_SWAV_SIAC_SESP -->
<pes compset=".*EAM.+ELM.+DOCN" pesize="any">
<comment> -compset A_WCYCL* -res ne30pg2_oECv3 without MPASO on 4 nodes, 128x1 c8 </comment>
<comment> -compset A_WCYCL* -res ne30pg2_oECv3 without MPASO on 4 nodes, 128x1 </comment>
<MAX_MPITASKS_PER_NODE>128</MAX_MPITASKS_PER_NODE>
<ntasks>
<ntasks_atm>512</ntasks_atm>
<ntasks_lnd>512</ntasks_lnd>
<ntasks_rof>512</ntasks_rof>
<ntasks_ice>512</ntasks_ice>
<ntasks_ocn>512</ntasks_ocn>
<ntasks_cpl>64</ntasks_cpl>
<ntasks_cpl>512</ntasks_cpl>
</ntasks>
<pstrid>
<pstrid_cpl>8</pstrid_cpl>
</pstrid>
</pes>
</mach>
</grid>
Expand Down Expand Up @@ -1113,7 +1110,7 @@
<grid name="a%ne120np4">
<mach name="pm-cpu|alvarez">
<pes compset=".*EAM.+ELM.+MPASSI.+DOCN.+SGLC.+SWAV.*" pesize="any">
<comment>pm-cpu ne120pg2 F-compset with MPASSI on 43 nodes 128x1c8 1.3 sypd</comment>
<comment>pm-cpu ne120pg2 F-compset with MPASSI on 43 nodes 128x1 ~1 sypd</comment>
<MAX_MPITASKS_PER_NODE>128</MAX_MPITASKS_PER_NODE>
<ntasks>
<ntasks_atm>5504</ntasks_atm>
Expand All @@ -1123,7 +1120,7 @@
<ntasks_ocn>5504</ntasks_ocn>
<ntasks_glc>64</ntasks_glc>
<ntasks_wav>64</ntasks_wav>
<ntasks_cpl>688</ntasks_cpl>
<ntasks_cpl>5504</ntasks_cpl>
</ntasks>
<nthrds>
<nthrds_atm>1</nthrds_atm>
Expand All @@ -1135,9 +1132,6 @@
<nthrds_wav>1</nthrds_wav>
<nthrds_cpl>1</nthrds_cpl>
</nthrds>
<pstrid>
<pstrid_cpl>8</pstrid_cpl>
</pstrid>
</pes>
</mach>
<mach name="theta">
Expand Down
7 changes: 2 additions & 5 deletions components/mpas-ocean/cime_config/config_pes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@
</mach>
<mach name="pm-cpu|alvarez">
<pes compset="DATM.+MPASO.+SWAV" pesize="any">
<comment>mpas-ocean: SO RRM, compset=DATM+MPASO, 8 nodes, 128x1c8 ~3.3 sypd</comment>
<comment>mpas-ocean: SO RRM, compset=DATM+MPASO, 8 nodes, 128x1 ~3.3 sypd</comment>
<MAX_MPITASKS_PER_NODE>128</MAX_MPITASKS_PER_NODE>
<ntasks>
<ntasks_atm>1024</ntasks_atm>
Expand All @@ -437,7 +437,7 @@
<ntasks_ice>1024</ntasks_ice>
<ntasks_ocn>1024</ntasks_ocn>
<ntasks_glc>1</ntasks_glc>
<ntasks_cpl>128</ntasks_cpl>
<ntasks_cpl>1024</ntasks_cpl>
</ntasks>
<nthrds>
<nthrds_atm>1</nthrds_atm>
Expand All @@ -458,9 +458,6 @@
<rootpe_glc>0</rootpe_glc>
<rootpe_cpl>0</rootpe_cpl>
</rootpe>
<pstrid>
<pstrid_cpl>8</pstrid_cpl>
</pstrid>
</pes>
</mach>
</grid>
Expand Down

0 comments on commit ff0eec8

Please sign in to comment.