diff --git a/ompi/mca/coll/adapt/coll_adapt_component.c b/ompi/mca/coll/adapt/coll_adapt_component.c index 5ca59238cfb..f42b8be408e 100644 --- a/ompi/mca/coll/adapt/coll_adapt_component.c +++ b/ompi/mca/coll/adapt/coll_adapt_component.c @@ -2,6 +2,7 @@ * Copyright (c) 2014-2020 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -114,39 +115,39 @@ static int adapt_register(void) we should have a high priority */ cs->adapt_priority = 0; (void) mca_base_component_var_register(c, "priority", "Priority of the adapt coll component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_priority); + MCA_BASE_VAR_SCOPE_ALL, &cs->adapt_priority); cs->adapt_verbose = ompi_coll_base_framework.framework_verbose; (void) mca_base_component_var_register(c, "verbose", "Verbose level (default set to the collective framework verbosity)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &cs->adapt_verbose); + MCA_BASE_VAR_SCOPE_ALL, &cs->adapt_verbose); cs->adapt_context_free_list_min = 64; (void) mca_base_component_var_register(c, "context_free_list_min", "Minimum number of segments in context free list", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &cs->adapt_context_free_list_min); cs->adapt_context_free_list_max = 1024; (void) mca_base_component_var_register(c, "context_free_list_max", "Maximum number of segments in context free list", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &cs->adapt_context_free_list_max); cs->adapt_context_free_list_inc = 32; (void) mca_base_component_var_register(c, "context_free_list_inc", "Increasement number of segments in context free list", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &cs->adapt_context_free_list_inc); ompi_coll_adapt_ibcast_register(); ompi_coll_adapt_ireduce_register(); diff --git a/ompi/mca/coll/adapt/coll_adapt_ibcast.c b/ompi/mca/coll/adapt/coll_adapt_ibcast.c index 3c6c2bac483..094abd4a14d 100644 --- a/ompi/mca/coll/adapt/coll_adapt_ibcast.c +++ b/ompi/mca/coll/adapt/coll_adapt_ibcast.c @@ -3,6 +3,7 @@ * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2022 IBM Corporation. All rights reserved + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,8 +35,9 @@ int ompi_coll_adapt_ibcast_register(void) mca_coll_adapt_component.adapt_ibcast_algorithm = 1; mca_base_component_var_register(c, "bcast_algorithm", - "Algorithm of broadcast, 0: tuned, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, + "Algorithm of broadcast, 0: tuned, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear", + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_ibcast_algorithm); if( (mca_coll_adapt_component.adapt_ibcast_algorithm < 0) || (mca_coll_adapt_component.adapt_ibcast_algorithm >= OMPI_COLL_ADAPT_ALGORITHM_COUNT) ) { @@ -45,33 +47,33 @@ int ompi_coll_adapt_ibcast_register(void) mca_coll_adapt_component.adapt_ibcast_segment_size = 0; mca_base_component_var_register(c, "bcast_segment_size", "Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_ibcast_segment_size); mca_coll_adapt_component.adapt_ibcast_max_send_requests = 2; mca_base_component_var_register(c, "bcast_max_send_requests", "Maximum number of send requests", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_ibcast_max_send_requests); mca_coll_adapt_component.adapt_ibcast_max_recv_requests = 3; mca_base_component_var_register(c, "bcast_max_recv_requests", "Maximum number of receive requests", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_ibcast_max_recv_requests); mca_coll_adapt_component.adapt_ibcast_synchronous_send = true; (void) mca_base_component_var_register(c, "bcast_synchronous_send", "Whether to use synchronous send operations during setup of bcast operations", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_ibcast_synchronous_send); mca_coll_adapt_component.adapt_ibcast_context_free_list = NULL; diff --git a/ompi/mca/coll/adapt/coll_adapt_ireduce.c b/ompi/mca/coll/adapt/coll_adapt_ireduce.c index 6428a90748f..2f260bf2b38 100644 --- a/ompi/mca/coll/adapt/coll_adapt_ireduce.c +++ b/ompi/mca/coll/adapt/coll_adapt_ireduce.c @@ -5,6 +5,7 @@ * Copyright (c) 2020 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2022 IBM Corporation. All rights reserved * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,8 +39,9 @@ int ompi_coll_adapt_ireduce_register(void) mca_coll_adapt_component.adapt_ireduce_algorithm = 1; mca_base_component_var_register(c, "reduce_algorithm", - "Algorithm of reduce, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, + "Algorithm of reduce, 1: binomial, 2: in_order_binomial, 3: binary, 4: pipeline, 5: chain, 6: linear", + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_ireduce_algorithm); if( (mca_coll_adapt_component.adapt_ireduce_algorithm < 0) || (mca_coll_adapt_component.adapt_ireduce_algorithm > OMPI_COLL_ADAPT_ALGORITHM_COUNT) ) { @@ -49,58 +51,58 @@ int ompi_coll_adapt_ireduce_register(void) mca_coll_adapt_component.adapt_ireduce_segment_size = 163740; mca_base_component_var_register(c, "reduce_segment_size", "Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.", - MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, + MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_ireduce_segment_size); mca_coll_adapt_component.adapt_ireduce_max_send_requests = 2; mca_base_component_var_register(c, "reduce_max_send_requests", "Maximum number of send requests", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_ireduce_max_send_requests); mca_coll_adapt_component.adapt_ireduce_max_recv_requests = 3; mca_base_component_var_register(c, "reduce_max_recv_requests", "Maximum number of receive requests per peer", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_ireduce_max_recv_requests); mca_coll_adapt_component.adapt_inbuf_free_list_min = 10; mca_base_component_var_register(c, "inbuf_free_list_min", "Minimum number of segment in inbuf free list", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_inbuf_free_list_min); mca_coll_adapt_component.adapt_inbuf_free_list_max = 10000; mca_base_component_var_register(c, "inbuf_free_list_max", "Maximum number of segment in inbuf free list", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_inbuf_free_list_max); mca_coll_adapt_component.adapt_inbuf_free_list_inc = 10; mca_base_component_var_register(c, "inbuf_free_list_inc", "Number of segments to allocate when growing the inbuf free list", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_inbuf_free_list_inc); mca_coll_adapt_component.adapt_ireduce_synchronous_send = true; (void) mca_base_component_var_register(c, "reduce_synchronous_send", "Whether to use synchronous send operations during setup of reduce operations", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_adapt_component.adapt_ireduce_synchronous_send); mca_coll_adapt_component.adapt_ireduce_context_free_list = NULL; diff --git a/ompi/mca/coll/base/coll_base_util.c b/ompi/mca/coll/base/coll_base_util.c index ae9010497d7..ba74aa01350 100644 --- a/ompi/mca/coll/base/coll_base_util.c +++ b/ompi/mca/coll/base/coll_base_util.c @@ -12,6 +12,9 @@ * Copyright (c) 2014-2020 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. + * + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. + * * $COPYRIGHT$ * * Additional copyrights may follow @@ -482,6 +485,26 @@ int ompi_coll_base_file_peek_next_char_is(FILE *fptr, int *fileline, int expecte } while (1); } +/** + * return non-zero if the next non-space to read on the current line is a digit. + * otherwise return 0. + */ +int ompi_coll_base_file_peek_next_char_isdigit(FILE *fptr) +{ + do { + int next = fgetc(fptr); + + if ((' ' == next) || ('\t' == next)) { + continue; /* discard space and tab. keep everything else */ + } + + ungetc(next, fptr); /* put the char back into the stream */ + + return isdigit(next); /* report back whether or not next is a digit */ + + } while (1); +} + /** * There are certainly simpler implementation for this function when performance * is not a critical point. But, as this function is used during the collective diff --git a/ompi/mca/coll/base/coll_base_util.h b/ompi/mca/coll/base/coll_base_util.h index 852abcedefa..7bceaa7dcc0 100644 --- a/ompi/mca/coll/base/coll_base_util.h +++ b/ompi/mca/coll/base/coll_base_util.h @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2014-2020 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -195,6 +196,7 @@ int ompi_coll_base_file_getnext_string(FILE *fptr, int *fileline, char** val); * eat the value, otherwise put it back into the file. */ int ompi_coll_base_file_peek_next_char_is(FILE *fptr, int *fileline, int expected); +int ompi_coll_base_file_peek_next_char_isdigit(FILE *fptr); /* Miscellaneous function */ const char* mca_coll_base_colltype_to_str(int collid); diff --git a/ompi/mca/coll/basic/coll_basic_component.c b/ompi/mca/coll/basic/coll_basic_component.c index a2fd0928c78..59941c75dae 100644 --- a/ompi/mca/coll/basic/coll_basic_component.c +++ b/ompi/mca/coll/basic/coll_basic_component.c @@ -13,6 +13,7 @@ * Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -91,16 +92,16 @@ basic_register(void) mca_coll_basic_priority = 10; (void) mca_base_component_var_register(&mca_coll_basic_component.collm_version, "priority", "Priority of the basic coll component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_basic_priority); mca_coll_basic_crossover = 4; (void) mca_base_component_var_register(&mca_coll_basic_component.collm_version, "crossover", "Minimum number of processes in a communicator before using the logarithmic algorithms", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &mca_coll_basic_crossover); return OMPI_SUCCESS; diff --git a/ompi/mca/coll/han/coll_han_component.c b/ompi/mca/coll/han/coll_han_component.c index 1d14baf538a..0728acba293 100644 --- a/ompi/mca/coll/han/coll_han_component.c +++ b/ompi/mca/coll/han/coll_han_component.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2022 IBM Corporation. All rights reserved * Copyright (c) 2020-2022 Bull S.A.S. All rights reserved. + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -221,9 +222,9 @@ mca_coll_han_query_module_from_mca(mca_base_component_t* c, *storage = ompi_coll_han_available_components[mod_id].component_name; (void) mca_base_component_var_register(c, param_name, param_doc, - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, info_level, - MCA_BASE_VAR_SCOPE_READONLY, storage); + MCA_BASE_VAR_SCOPE_ALL, storage); module_name = *storage; mod_id = strtol(module_name, &endptr, 10); if( module_name == endptr ) { /* no conversion, maybe we got a module name instead */ @@ -252,22 +253,22 @@ static int han_register(void) COMPONENT_T component; (void) mca_base_component_var_register(c, "priority", "Priority of the HAN coll component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &cs->han_priority); + MCA_BASE_VAR_SCOPE_ALL, &cs->han_priority); cs->han_output_verbose = 0; (void) mca_base_component_var_register(c, "verbose", "Verbosity of the HAN coll component (use coll base verbosity if not set)", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &cs->han_output_verbose); + MCA_BASE_VAR_SCOPE_ALL, &cs->han_output_verbose); cs->han_bcast_segsize = 65536; (void) mca_base_component_var_register(c, "bcast_segsize", "segment size for bcast", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_segsize); + MCA_BASE_VAR_SCOPE_ALL, &cs->han_bcast_segsize); cs->han_bcast_up_module = 0; (void) mca_coll_han_query_module_from_mca(c, "bcast_up_module", @@ -285,9 +286,9 @@ static int han_register(void) cs->han_reduce_segsize = 65536; (void) mca_base_component_var_register(c, "reduce_segsize", "segment size for reduce", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_segsize); + MCA_BASE_VAR_SCOPE_ALL, &cs->han_reduce_segsize); cs->han_reduce_up_module = 0; (void) mca_coll_han_query_module_from_mca(c, "reduce_up_module", @@ -304,9 +305,9 @@ static int han_register(void) cs->han_allreduce_segsize = 65536; (void) mca_base_component_var_register(c, "allreduce_segsize", "segment size for allreduce", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_segsize); + MCA_BASE_VAR_SCOPE_ALL, &cs->han_allreduce_segsize); cs->han_allreduce_up_module = 0; (void) mca_coll_han_query_module_from_mca(c, "allreduce_up_module", @@ -361,9 +362,9 @@ static int han_register(void) "whether we need reproducible results " "(enabling this disables optimisations using topology)" "0 disable 1 enable, default 0", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3, - MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reproducible); + MCA_BASE_VAR_SCOPE_ALL, &cs->han_reproducible); /* * Han algorithms MCA parameters for each collective. * Shows algorithms thanks to enumerator @@ -476,9 +477,9 @@ static int han_register(void) } mca_base_component_var_register(c, param_name, param_desc, - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &(cs->mca_sub_components[coll][topo_lvl])); } } @@ -488,27 +489,27 @@ static int han_register(void) (void) mca_base_component_var_register(&mca_coll_han_component.super.collm_version, "use_dynamic_file_rules", "Enable the dynamic selection provided via the dynamic_rules_filename MCA", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_6, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &(cs->use_dynamic_file_rules)); cs->dynamic_rules_filename = NULL; (void) mca_base_component_var_register(&mca_coll_han_component.super.collm_version, "dynamic_rules_filename", "Configuration file containing the dynamic selection rules", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, + MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_6, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &(cs->dynamic_rules_filename)); cs->dump_dynamic_rules = false; (void) mca_base_component_var_register(&mca_coll_han_component.super.collm_version, "dump_dynamic_rules", "Switch used to decide if we dump dynamic rules provided by configuration file", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_6, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &(cs->dump_dynamic_rules)); if((cs->dump_dynamic_rules || NULL != cs->dynamic_rules_filename) @@ -525,9 +526,9 @@ static int han_register(void) "errors printed on rank 0 " "with a 0 verbosity." "Useless if coll_base_verbose is 30 or more.", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_6, - MCA_BASE_VAR_SCOPE_READONLY, + MCA_BASE_VAR_SCOPE_ALL, &(cs->max_dynamic_errors)); diff --git a/ompi/mca/coll/hcoll/coll_hcoll_component.c b/ompi/mca/coll/hcoll/coll_hcoll_component.c index cee65bc87ce..9b184d9386c 100644 --- a/ompi/mca/coll/hcoll/coll_hcoll_component.c +++ b/ompi/mca/coll/hcoll/coll_hcoll_component.c @@ -3,6 +3,7 @@ * Copyright (c) 2011 Mellanox Technologies. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -107,8 +108,8 @@ static int reg_int(const char* param_name, index = mca_base_component_var_register( &mca_coll_hcoll_component.super.collm_version, param_name, param_desc, MCA_BASE_VAR_TYPE_INT, - NULL, 0, 0,OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, storage); + NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_ALL, storage); if (NULL != deprecated_param_name) { (void) mca_base_var_register_synonym(index, "ompi", "coll", "hcoll", deprecated_param_name, diff --git a/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c b/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c index e56ece1d0b4..5eb8ef4317e 100644 --- a/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c +++ b/ompi/mca/coll/tuned/coll_tuned_dynamic_file.c @@ -12,6 +12,7 @@ * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -42,13 +43,24 @@ static int fileline=0; /* used for verbose error messages */ #define getnext(fptr, pval) ompi_coll_base_file_getnext_long(fptr, &fileline, pval) +#define isnext_digit(fptr) ompi_coll_base_file_peek_next_char_isdigit(fptr) /* * Reads a rule file called fname - * Builds the algorithm rule table for a max of n_collectives + * The rule file defines a set of sets of rules. The outer set is keyed on + * communicator size while the inner set is keyed on message size. When a + * communicator is constructed its size is used to look up the nested set of + * message size keyed rules. When a collective is called the message size + * determined from its call arguments are used to lookup a specific rule in the + * inner set. + * + * Rules for communicator and message sizes 0 and N (where N is the larger than + * largest key you provide) can be specified to fall back to the fixed decision + * framework above and below the communicator and message size ranges of + * interest. * * If an error occurs it removes rule table and then exits with a very verbose - * error message (this stops the user using a half baked rule table + * error message. this stops the user using a half baked rule table. * * Returns the number of actual collectives that a rule exists for * (note 0 is NOT an error) @@ -57,9 +69,18 @@ static int fileline=0; /* used for verbose error messages */ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** rules, int n_collectives) { - long CI, NCS, CS, ALG, NMS, FANINOUT, X, MS, SS; + long NCOL = 0, /* number of collectives for which rules are provided */ + COLID = 0, /* identifies the collective type to associate the rules with */ + NCOMSIZES = 0, /* number of sets of message size rules. the key is communicator size */ + COMSIZE = 0, /* communicator size, the key identifying a specific set of message size rules. */ + NMSGSIZES = 0, /* number of message size rules in the set. */ + MSGSIZE = 0, /* message size, the key identifying a specific rule in the set. */ + ALG = 0, /* the collective specific algorithm to use */ + FANINOUT = 0, /* algorithm specific tuning parameter */ + SEGSIZE = 0, /* algorithm specific tuning parameter */ + MAXREQ = 0; /* algorithm specific tuning parameter */ FILE *fptr = (FILE*) NULL; - int x, ncs, nms; + int x, ncs, nms, version; ompi_coll_alg_rule_t *alg_rules = (ompi_coll_alg_rule_t*) NULL; /* complete table of rules */ @@ -103,68 +124,78 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** goto on_file_error; } - if( (getnext(fptr, &X) < 0) || (X < 0) ) { + /* consume the optional version identifier */ + if (0 == fscanf(fptr, "rule-file-version-%u", &version)) { + version = 1; + } + + /* get the number of collectives for which rules are provided in the file */ + if( (getnext(fptr, &NCOL) < 0) || (NCOL < 0) ) { OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of collectives in configuration file around line %d\n", fileline)); goto on_file_error; } - if (X>n_collectives) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %ld is greater than number of MPI collectives possible %d ??? error around line %d\n", X, n_collectives, fileline)); + if (NCOL>n_collectives) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Number of collectives in configuration file %ld is greater than number of MPI collectives possible %d ??? error around line %d\n", NCOL, n_collectives, fileline)); goto on_file_error; } - for (x=0;x=n_collectives) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %ld is greater than MPI collectives possible %d. Error around line %d\n", CI, n_collectives, fileline)); + if (COLID>=n_collectives) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Collective id in configuration file %ld is greater than MPI collectives possible %d. Error around line %d\n", COLID, n_collectives, fileline)); goto on_file_error; } - if (alg_rules[CI].alg_rule_id != CI) { - OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %ld\n", CI)); + if (alg_rules[COLID].alg_rule_id != COLID) { + OPAL_OUTPUT((ompi_coll_tuned_stream, "Internal error in handling collective ID %ld\n", COLID)); goto on_file_error; } - OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %ld\n", CI)); - alg_p = &alg_rules[CI]; + OPAL_OUTPUT((ompi_coll_tuned_stream, "Reading dynamic rule for collective ID %ld\n", COLID)); + alg_p = &alg_rules[COLID]; - alg_p->alg_rule_id = CI; + alg_p->alg_rule_id = COLID; alg_p->n_com_sizes = 0; alg_p->com_rules = (ompi_coll_com_rule_t *) NULL; - if( (getnext (fptr, &NCS) < 0) || (NCS < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %ld at around line %d\n", CI, fileline)); + /* get the number of communicator sizes for which a set of rules are to be provided */ + if( (getnext (fptr, &NCOMSIZES) < 0) || (NCOMSIZES < 0) ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read count of communicators for collective ID %ld at around line %d\n", COLID, fileline)); goto on_file_error; } - OPAL_OUTPUT((ompi_coll_tuned_stream, "Read communicator count %ld for dynamic rule for collective ID %ld\n", NCS, CI)); - alg_p->n_com_sizes = NCS; - alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCS, CI); + OPAL_OUTPUT((ompi_coll_tuned_stream, "Read communicator count %ld for dynamic rule for collective ID %ld\n", NCOMSIZES, COLID)); + alg_p->n_com_sizes = NCOMSIZES; + alg_p->com_rules = ompi_coll_tuned_mk_com_rules (NCOMSIZES, COLID); if (NULL == alg_p->com_rules) { OPAL_OUTPUT((ompi_coll_tuned_stream,"Cannot allocate com rules for file [%s]\n", fname)); goto on_file_error; } - for (ncs=0;ncscom_rules[ncs]); - if( (getnext (fptr, &CS) < 0) || (CS < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %ld com rule %d at around line %d\n", CI, ncs, fileline)); + /* get the communicator size to associate the set of rules with */ + if( (getnext (fptr, &COMSIZE) < 0) || (COMSIZE < 0) ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read communicator size for collective ID %ld com rule %d at around line %d\n", COLID, ncs, fileline)); goto on_file_error; } - com_p->mpi_comsize = CS; + com_p->mpi_comsize = COMSIZE; - if( (getnext (fptr, &NMS) < 0) || (NMS < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %ld com rule %d at around line %d\n", CI, ncs, fileline)); + /* get the number of message sizes to specify rules for. inner set size */ + if( (getnext (fptr, &NMSGSIZES) < 0) || (NMSGSIZES < 0) ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read number of message sizes for collective ID %ld com rule %d at around line %d\n", COLID, ncs, fileline)); goto on_file_error; } OPAL_OUTPUT((ompi_coll_tuned_stream, "Read message count %ld for dynamic rule for collective ID %ld and comm size %ld\n", - NMS, CI, CS)); - com_p->n_msg_sizes = NMS; - com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMS, CI, ncs, CS); + NMSGSIZES, COLID, COMSIZE)); + com_p->n_msg_sizes = NMSGSIZES; + com_p->msg_rules = ompi_coll_tuned_mk_msg_rules (NMSGSIZES, COLID, ncs, COMSIZE); if (NULL == com_p->msg_rules) { OPAL_OUTPUT((ompi_coll_tuned_stream,"Cannot allocate msg rules for file [%s]\n", fname)); goto on_file_error; @@ -172,37 +203,52 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** msg_p = com_p->msg_rules; - for (nms=0;nmsmsg_rules[nms]); - if( (getnext (fptr, &MS) < 0) || (MS < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + /* read the message size to associate the rule with */ + if( (getnext (fptr, &MSGSIZE) < 0) || (MSGSIZE < 0) ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read message size for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline)); goto on_file_error; } - msg_p->msg_size = (size_t)MS; + msg_p->msg_size = (size_t)MSGSIZE; + /* read the collective specific algorithm identifier */ if( (getnext (fptr, &ALG) < 0) || (ALG < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target algorithm method for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline)); goto on_file_error; } msg_p->result_alg = ALG; + /* read faninout tuning parameter. required */ if( (getnext (fptr, &FANINOUT) < 0) || (FANINOUT < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read fan in/out topo for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline)); goto on_file_error; } msg_p->result_topo_faninout = FANINOUT; - if( (getnext (fptr, &SS) < 0) || (SS < 0) ) { - OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %ld com rule %d msg rule %d at around line %d\n", CI, ncs, nms, fileline)); + /* read segsize tuning parameter. required */ + if( (getnext (fptr, &SEGSIZE) < 0) || (SEGSIZE < 0) ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read target segment size for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline)); goto on_file_error; } - msg_p->result_segsize = SS; + msg_p->result_segsize = SEGSIZE; + + /* read the max requests tuning parameter. optional */ + msg_p->result_max_requests = ompi_coll_tuned_alltoall_max_requests; + if( (version > 1) && isnext_digit(fptr) ) { + if( (getnext (fptr, &MAXREQ) < 0) || (MAXREQ < 0) ) { + OPAL_OUTPUT((ompi_coll_tuned_stream,"Could not read max requests for collective ID %ld com rule %d msg rule %d at around line %d\n", COLID, ncs, nms, fileline)); + goto on_file_error; + } + msg_p->result_max_requests = MAXREQ; + } - if (!nms && MS) { + /* check the first rule is for 0 size. look-up depends on this */ + if (!nms && MSGSIZE) { OPAL_OUTPUT((ompi_coll_tuned_stream,"All algorithms must specify a rule for message size of zero upwards always first!\n")); - OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %ld com rule %d msg rule %d at around line %d\n", MS, CI, ncs, nms, fileline)); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Message size was %lu for collective ID %ld com rule %d msg rule %d at around line %d\n", MSGSIZE, COLID, ncs, nms, fileline)); goto on_file_error; } @@ -219,13 +265,14 @@ int ompi_coll_tuned_read_rules_config_file (char *fname, ompi_coll_alg_rule_t** } /* comm size */ total_alg_count++; - OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %ld\n", CI)); + OPAL_OUTPUT((ompi_coll_tuned_stream, "Done reading dynamic rule for collective ID %ld\n", COLID)); } /* per collective */ fclose (fptr); OPAL_OUTPUT((ompi_coll_tuned_stream,"\nConfigure file Stats\n")); + OPAL_OUTPUT((ompi_coll_tuned_stream,"Version\t\t\t\t\t: %5u\n", version)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Collectives with rules\t\t\t: %5d\n", total_alg_count)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Communicator sizes with rules\t\t: %5d\n", total_com_count)); OPAL_OUTPUT((ompi_coll_tuned_stream,"Message sizes with rules\t\t: %5d\n", total_msg_count)); diff --git a/ompi/mca/coll/ucc/coll_ucc_component.c b/ompi/mca/coll/ucc/coll_ucc_component.c index 8cc5ee0ddae..92d46859370 100644 --- a/ompi/mca/coll/ucc/coll_ucc_component.c +++ b/ompi/mca/coll/ucc/coll_ucc_component.c @@ -2,6 +2,7 @@ /* * Copyright (c) 2021 Mellanox Technologies. All rights reserved. * Copyright (c) 2022 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -60,24 +61,24 @@ static int mca_coll_ucc_register(void) mca_coll_ucc_component_t *cm = &mca_coll_ucc_component; mca_base_component_t *c = &cm->super.collm_version; mca_base_component_var_register(c, "priority", "Priority of the UCC coll component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &cm->ucc_priority); + MCA_BASE_VAR_SCOPE_ALL, &cm->ucc_priority); mca_base_component_var_register(c, "verbose", "Verbose level of the UCC coll component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &cm->ucc_verbose); + MCA_BASE_VAR_SCOPE_ALL, &cm->ucc_verbose); mca_base_component_var_register(c, "enable", "[0|1] Enable/Disable the UCC coll component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &cm->ucc_enable); + MCA_BASE_VAR_SCOPE_ALL, &cm->ucc_enable); mca_base_component_var_register(c, "np", "Minimal communicator size for the UCC coll component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &cm->ucc_np); + MCA_BASE_VAR_SCOPE_ALL, &cm->ucc_np); mca_base_component_var_register(c, MCA_COMPILETIME_VER, "Version of the libucc library with which Open MPI was compiled", @@ -94,14 +95,14 @@ static int mca_coll_ucc_register(void) cm->cls = ""; mca_base_component_var_register(c, "cls", "Comma separated list of UCC CLS to be used for team creation", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_6, MCA_BASE_VAR_SCOPE_READONLY, &cm->cls); + MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_6, MCA_BASE_VAR_SCOPE_ALL, &cm->cls); cm->cts = COLL_UCC_CTS_STR; mca_base_component_var_register(c, "cts", "Comma separated list of UCC coll types to be enabled", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_6, MCA_BASE_VAR_SCOPE_READONLY, &cm->cts); + MCA_BASE_VAR_TYPE_STRING, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_6, MCA_BASE_VAR_SCOPE_ALL, &cm->cts); return OMPI_SUCCESS; }