From 6072be8923fce19c38f3bc89b2bc1fa49f684a26 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Wed, 20 Mar 2024 08:08:12 -0400 Subject: [PATCH 01/20] Deprecate blocks for Collect, Broadcast, update def apireturnvalues --- content/shmem_broadcast.tex | 5 +++++ content/shmem_collect.tex | 8 ++++++++ content/shmem_malloc_hints.tex | 4 ---- utils/defs.tex | 3 +-- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index a172a12e7..3e4b376ed 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -83,6 +83,7 @@ the team. \end{itemize} +\begin{DeprecateBlock} For active-set-based broadcasts: \begin{itemize} \item The \dest{} object is updated on all \acp{PE} other than the @@ -128,13 +129,17 @@ \end{itemize} \item The \source{} data object may be safely reused. \end{itemize} +\end{DeprecateBlock} } \apireturnvalues{ For team-based broadcasts, zero on successful local completion; otherwise, nonzero. +\begin{DeprecateBlock} For active-set-based broadcasts, none. +\end{DeprecateBlock} + } \apinotes{ diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 5430abcfc..897bdcb3f 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -90,6 +90,7 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. +\begin{DeprecateBlock} Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. As with all active-set-based collective routines, @@ -108,6 +109,7 @@ \item For active-set-based collective routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} +\end{DeprecateBlock} } \apireturnvalues{ @@ -115,9 +117,15 @@ } \apinotes{ +\begin{DeprecateBlock} The collective routines operate on active \ac{PE} sets that have a non-power-of-two \VAR{PE\_size} with some performance degradation. They operate with no performance degradation when \VAR{nelems} is a non-power-of-two value. +\end{DeprecateBlock} + The collective routines that operate on teams containing a + non-power-of-two of PEs do so with some performance degradation. They operate + with no performance degradation when \VAR{nelems} is a non-power-of-two value. + } \begin{apiexamples} diff --git a/content/shmem_malloc_hints.tex b/content/shmem_malloc_hints.tex index f840cb85a..174e143a4 100644 --- a/content/shmem_malloc_hints.tex +++ b/content/shmem_malloc_hints.tex @@ -57,19 +57,15 @@ \tabularnewline \hline \endhead %% - \newline \CONST{0} & - \newline Behavior same as \FUNC{shmem\_malloc} \tabularnewline \hline \LibConstDecl{SHMEM\_MALLOC\_ATOMICS\_REMOTE} & - \newline Memory used for \VAR{atomic} operations \tabularnewline \hline \LibConstDecl{SHMEM\_MALLOC\_SIGNAL\_REMOTE} & - \newline Memory used for \VAR{signal} operations \tabularnewline \hline diff --git a/utils/defs.tex b/utils/defs.tex index 4496b6fe0..5fd1e6ac7 100644 --- a/utils/defs.tex +++ b/utils/defs.tex @@ -362,8 +362,7 @@ \hfill \item[Return Values] \hfill \\ #1 -\\ -\hfill +\hfill \\ } \newcommand{\apitablerow}[2]{ From 14c87b8de8114a21f4ac57c907e3f4889b2201a2 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Wed, 20 Mar 2024 11:42:13 -0400 Subject: [PATCH 02/20] Deprecate active-set language in Collectives, missing Reductions --- content/shmem_alltoall.tex | 38 ++++++++++++++++++-------- content/shmem_alltoalls.tex | 4 +-- content/shmem_broadcast.tex | 53 ++++++++++++++++++++++++------------- content/shmem_collect.tex | 23 +++++++++++++--- content/shmem_sync.tex | 19 ++++++++++--- 5 files changed, 99 insertions(+), 38 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 188e28759..c37823d8b 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -35,10 +35,10 @@ \apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the - active set. + particpating \acp{PE}. The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{source}{Symmetric address of a data object that contains \VAR{nelems} - elements of data for each \ac{PE} in the active set, ordered according to + elements of data for each \ac{PE} in the participating \acp{PE}, ordered according to destination \ac{PE}. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nelems}{ @@ -100,6 +100,21 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. + Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, + the following conditions must be ensured: + \begin{itemize} + \item The \VAR{dest} data object on all \acp{PE} in the team is + ready to accept the \FUNC{shmem\_alltoall} data. + \end{itemize} + + Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for + the local PE: + \begin{itemize} + \item Its \VAR{dest} symmetric data object is completely updated and the + data has been copied out of the source data object. + \end{itemize} + +\begin{DeprecateBlock} Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. @@ -117,22 +132,23 @@ Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following conditions must be ensured: \begin{itemize} - \item The \VAR{dest} data object on all \acp{PE} in the active set is - ready to accept the \FUNC{shmem\_alltoall} data. - \item For active-set-based routines, the \VAR{pSync} array - on all \acp{PE} in the active set is not still in use from a prior call - to a \FUNC{shmem\_alltoall} routine. + \item The \VAR{dest} data object on all \acp{PE} in the active set is + ready to accept the \FUNC{shmem\_alltoall} data. + \item For active-set-based routines, the \VAR{pSync} array + on all \acp{PE} in the active set is not still in use from a prior call + to a \FUNC{shmem\_alltoall} routine. \end{itemize} Otherwise, the behavior is undefined. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: \begin{itemize} - \item Its \VAR{dest} symmetric data object is completely updated and - the data has been copied out of the \VAR{source} data object. - \item For active-set-based routines, - the values in the \VAR{pSync} array are restored to the original values. + \item Its \VAR{dest} symmetric data object is completely updated and the + data has been copied out of the source data object. + \item For active-set-based routines, + the values in the \VAR{pSync} array are restored to the original values. \end{itemize} +\end{DeprecateBlock} } \apireturnvalues{ diff --git a/content/shmem_alltoalls.tex b/content/shmem_alltoalls.tex index e371b8cf9..d1bd7d1f9 100644 --- a/content/shmem_alltoalls.tex +++ b/content/shmem_alltoalls.tex @@ -35,10 +35,10 @@ \apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the - active set. + participating \acp{PE}. The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{source}{Symmetric address of a data object that contains \VAR{nelems} - elements of data for each \ac{PE} in the active set, ordered according to + elements of data for each \ac{PE} in the participating \acp{PE}, ordered according to destination \ac{PE}. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{dst}{The stride between consecutive elements of the \dest{} diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 3e4b376ed..5aec7b9dc 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -45,7 +45,7 @@ respectively. } \apiargument{IN}{PE\_root}{Zero-based ordinal of the \ac{PE}, with respect to - the team or active set, from which the data is copied.} + the calling PEs, from which the data is copied.} \begin{DeprecateBlock} @@ -61,8 +61,7 @@ \end{apiarguments} \apidescription{ - \openshmem broadcast routines are collective routines over an active set or - valid \openshmem team. + \openshmem team-based broadcast routines are collective routines over a valid \openshmem team. They copy the \source{} data object on the \ac{PE} specified by \VAR{PE\_root} to the \dest{} data object on the \acp{PE} participating in the collective operation. @@ -75,6 +74,9 @@ \item The \dest{} object is updated on all \acp{PE}. \item All \acp{PE} in the \VAR{team} argument must participate in the operation. + \item Only \acp{PE} in the team may call the routine. If a + \ac{PE} not in the team calls a team-based + collective routine, the behavior is undefined. \item If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. \item \ac{PE} numbering is relative to the team. The specified @@ -82,12 +84,34 @@ between \CONST{0} and \VAR{N$-$1}, where \VAR{N} is the size of the team. \end{itemize} + + Before any \ac{PE} calls a broadcast routine, the following + conditions must be ensured: + \begin{itemize} + \item The \dest{} array on all \acp{PE} participating in the broadcast + is ready to accept the broadcast data. + \end{itemize} + Otherwise, the behavior is undefined. + + Upon return from a team-based broadcast routine, the following are true for the local + \ac{PE}: + \begin{itemize} + \item The \dest{} data object is updated. + \item The \source{} data object may be safely reused. + \end{itemize} \begin{DeprecateBlock} + \openshmem active-set broadcast routines are collective routines over an active set. + They copy the \source{} data object on the \ac{PE} specified by + \VAR{PE\_root} to the \dest{} data object on the \acp{PE} + participating in the collective operation. + The same \dest{} and \source{} data objects and the same value of + \VAR{PE\_root} must be passed by all \acp{PE} participating in the + collective operation. + For active-set-based broadcasts: \begin{itemize} - \item The \dest{} object is updated on all \acp{PE} other than the - root \ac{PE}. + \item The \VAR{dest} object is updated on all PEs other than the root PE. \item All \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet must participate in the operation. @@ -103,31 +127,24 @@ in the active set. \end{itemize} - Before any \ac{PE} calls a broadcast routine, the following + Before any \ac{PE} calls a active-set-based broadcast routine, the following conditions must be ensured: \begin{itemize} \item The \dest{} array on all \acp{PE} participating in the broadcast is ready to accept the broadcast data. - \item For active-set-based broadcasts, the - \VAR{pSync} array on all \acp{PE} in the + \item The \VAR{pSync} array on all \acp{PE} in the active set is not still in use from a prior call to an \openshmem collective routine. \end{itemize} - Otherwise, the behavior is undefined. + Otherwise, the behavior is undefined. - Upon return from a broadcast routine, the following are true for the local + Upon return from a active-based broadcast routine, the following are true for the local \ac{PE}: \begin{itemize} - \item For team-based broadcasts, the \dest{} data object is - updated. - \item For active-set-based broadcasts: - \begin{itemize} - \item If the current \ac{PE} is not the root \ac{PE}, the - \dest{} data object is updated. + \item If the current PE is not the root PE, the \dest{} data object is updated. + \item The \source{} data object may be safely reused. \item The values in the \VAR{pSync} array are restored to the original values. - \end{itemize} - \item The \source{} data object may be safely reused. \end{itemize} \end{DeprecateBlock} } diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 897bdcb3f..63814c79d 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -70,9 +70,7 @@ in processor number order. The resultant \dest{} array contains the contribution from \acp{PE} as follows: - \begin{itemize} - \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the - contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. + \begin{itemize} \item For a team, the data from \ac{PE} number \CONST{0} in the team is first, then the contribution from \ac{PE} \CONST{1} in the team, and so on. \end{itemize} @@ -91,6 +89,25 @@ otherwise invalid, the behavior is undefined. \begin{DeprecateBlock} + \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective + operation to concatenate \VAR{nelems} + data items from the \source{} array into the + \dest{} array, over an \openshmem active set + in processor number order. The resultant \dest{} array contains the contribution from + \acp{PE} as follows: + \begin{itemize} + \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the + contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. + \end{itemize} + + The collected result is written to the \dest{} array for all \acp{PE} + that participate in the operation. The same \dest{} and \source{} + arrays must be passed by all \acp{PE} that participate in the operation. + + The \FUNC{fcollect} routines require that \VAR{nelems} be the same value in all + participating \acp{PE}, while the \FUNC{collect} routines allow \VAR{nelems} to + vary from \ac{PE} to \ac{PE}. + Active-set-based collective routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. As with all active-set-based collective routines, diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 6e41ee825..8ba9b0422 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -38,12 +38,12 @@ \apidescription{ \FUNC{shmem\_sync} is a collective synchronization routine over an - existing \openshmem team or active set. + existing \openshmem team. The routine registers the arrival of a \ac{PE} at a synchronization point in the program. This is a fast mechanism for synchronizing all \acp{PE} that participate in this collective call. The routine blocks the calling \ac{PE} until all \acp{PE} in the - specified team or active set have called \FUNC{shmem\_sync}. In a multithreaded \openshmem + specified team have called \FUNC{shmem\_sync}. In a multithreaded \openshmem program, only the calling thread is blocked. Team-based sync routines operate over all \acp{PE} in the provided team argument. All @@ -51,6 +51,15 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. +\begin{DeprecateBlock} + \FUNC{shmem\_sync} is a collective synchronization routine over an active set. + + The routine registers the arrival of a \ac{PE} at a synchronization point in the program. + This is a fast mechanism for synchronizing all \acp{PE} that participate in this + collective call. The routine blocks the calling \ac{PE} until all \acp{PE} in the + active set have called \FUNC{shmem\_sync}. In a multithreaded \openshmem + program, only the calling thread is blocked. + Active-set-based sync routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. @@ -64,12 +73,14 @@ \VAR{PE\_size} must be equal on all \acp{PE} in the active set. The same work array must be passed in \VAR{pSync} to all \acp{PE} in the active set. + The same \VAR{pSync} array may be reused on consecutive calls to + \FUNC{shmem\_sync} if the same active set is used. +\end{DeprecateBlock} + In contrast with the \FUNC{shmem\_barrier} routine, \FUNC{shmem\_sync} only ensures completion and visibility of previously issued memory stores and does not ensure completion of remote memory updates issued via \openshmem routines. - The same \VAR{pSync} array may be reused on consecutive calls to - \FUNC{shmem\_sync} if the same active set is used. } \apireturnvalues{ From 8997a4e836d92d4581d488693956a7fa660fbe38 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Thu, 28 Mar 2024 09:44:13 -0400 Subject: [PATCH 03/20] Reductions, Programming Model, strided teams active set langauge deprecated/removed --- content/collective_intro.tex | 7 +++-- content/programming_model_overview.tex | 2 +- content/shmem_collect.tex | 4 +-- content/shmem_reductions.tex | 40 ++++++++++++++++++++++++-- content/shmem_sync.tex | 13 ++++++--- content/shmem_team_split_strided.tex | 6 ++-- 6 files changed, 56 insertions(+), 16 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index a2752c613..c3485e532 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -1,7 +1,7 @@ \emph{Collective routines} are defined as coordinated communication or synchronization operations performed by a group of \acp{PE}. -\openshmem provides three types of collective routines: +\openshmem provides four types of collective routines: \begin{enumerate} \item Collective routines that operate on teams use a team handle parameter to determine @@ -11,9 +11,12 @@ \begin{DeprecateBlock} \item Collective routines that operate on active sets use a set of parameters to determine which \acp{PE} will participate and what resources are used to perform operations. + +\item Collective routines that do not accept an active set + parameters and, as required, the default context. \end{DeprecateBlock} -\item Collective routines that accept neither team nor active set +\item Collective routines that do not accept team parameters, which implicitly operate on the world team and, as required, the default context. \end{enumerate} diff --git a/content/programming_model_overview.tex b/content/programming_model_overview.tex index a76c99de2..5daac5404 100644 --- a/content/programming_model_overview.tex +++ b/content/programming_model_overview.tex @@ -144,7 +144,7 @@ data object on another symmetric data object. \item \OPR{All-to-All}: All \acp{PE} participating in the routine exchange a fixed amount of contiguous or strided data with all other \acp{PE} - in the active set. + in the team. \end{enumerate} \item \textbf{Mutual Exclusion} diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 63814c79d..68b3e614f 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -66,8 +66,8 @@ \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective operation to concatenate \VAR{nelems} data items from the \source{} array into the - \dest{} array, over an \openshmem team or active set - in processor number order. The resultant \dest{} array contains the contribution from + \dest{} array, over an \openshmem team in processor number order. + The resultant \dest{} array contains the contribution from \acp{PE} as follows: \begin{itemize} diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index ff933b35e..be5543c26 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -251,12 +251,14 @@ \subsubsubsection{PROD} \apiargument{IN}{source}{Symmetric address of an array, of length \VAR{nreduce} elements, that contains one element for each separate reduction routine. The type of \source{} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} - arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. +\apiargument{IN}{nreduce}{the number of elements in the \dest{} and \source{} + arrays. in teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. In deprecated active-set based \ac{API} calls, \VAR{nreduce} must be of type integer.} \begin{DeprecateBlock} +\apiargument{IN}{nreduce}{In active-set based \ac{API} calls, + \VAR{nreduce} must be of type integer.} \apiargument{IN}{PE\_start}{The lowest \ac{PE} number of the active set of \acp{PE}.} \apiargument{IN}{logPE\_stride}{The log (base 2) of the stride between consecutive @@ -273,7 +275,7 @@ \subsubsubsection{PROD} \end{apiarguments} \apidescription{ - \openshmem reduction routines are collective routines over an active set or + \openshmem reduction routines are collective routines over an existing \openshmem team that compute one or more reductions across symmetric arrays on multiple \acp{PE}. A reduction performs an associative binary routine across a set of values. @@ -295,6 +297,37 @@ \subsubsubsection{PROD} If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. + Before any \ac{PE} calls a reduction routine, the following conditions must be ensured: + \begin{itemize} + \item The \dest{} array on all \acp{PE} participating in the reduction + is ready to accept the results of the \OPR{reduction}. + \end{itemize} + Otherwise, the behavior is undefined. + + Upon return from a reduction routine, the following are true for the local + \ac{PE}: + \begin{itemize} + \item The \dest{} array is updated and the \source{} array may be safely reused. + \end{itemize} + +\begin{DeprecateBlock} + \openshmem reduction routines are collective routines over an active set + that compute one or more reductions across symmetric + arrays on multiple \acp{PE}. A reduction performs an associative binary routine + across a set of values. + + The \VAR{nreduce} argument determines the number of separate reductions to + perform. The \source{} array on all \acp{PE} participating in the reduction + provides one element for each reduction. The results of the reductions are placed in the + \dest{} array on all \acp{PE} participating in the reduction. + + The same \source{} and \dest{} arrays must be passed by all PEs that + participate in the collective. + The \source{} and \dest{} arguments must either be the same symmetric + address, or two different symmetric addresses corresponding to buffers that + do not overlap in memory. That is, they must be completely overlapping (sometimes referred to as an ``in place'' reduction) or + completely disjoint. + Active-set-based sync routines operate over all \acp{PE} in the active set defined by the \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet. @@ -327,6 +360,7 @@ \subsubsubsection{PROD} \item If using active-set-based routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} +\end{DeprecateBlock} The complex-typed interfaces are only provided for sum and product reductions. When the \Cstd translation environment does not support complex types diff --git a/content/shmem_sync.tex b/content/shmem_sync.tex index 8ba9b0422..91a2ce61c 100644 --- a/content/shmem_sync.tex +++ b/content/shmem_sync.tex @@ -1,7 +1,11 @@ \apisummary{ Registers the arrival of a \ac{PE} at a synchronization point. This routine does not return until all other \acp{PE} in a given OpenSHMEM team - or active set arrive at this synchronization point. + arrive at this synchronization point. +\begin{DeprecateBlock} + Registers the arrival of a \ac{PE} at a synchronization point. + This routine does not return until all other \acp{PE} in a given OpenSHMEM active set arrive at this synchronization point. +\end{DeprecateBlock} } \begin{apidefinition} @@ -51,6 +55,10 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. + In contrast with the \FUNC{shmem\_barrier} routine, \FUNC{shmem\_sync} only + ensures completion and visibility of previously issued memory stores and does not ensure + completion of remote memory updates issued via \openshmem routines. + \begin{DeprecateBlock} \FUNC{shmem\_sync} is a collective synchronization routine over an active set. @@ -77,9 +85,6 @@ \FUNC{shmem\_sync} if the same active set is used. \end{DeprecateBlock} - In contrast with the \FUNC{shmem\_barrier} routine, \FUNC{shmem\_sync} only - ensures completion and visibility of previously issued memory stores and does not ensure - completion of remote memory updates issued via \openshmem routines. } diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 990f62209..0385bd252 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -97,10 +97,8 @@ \apinotes{ The \FUNC{shmem\_team\_split\_strided} operation uses an arbitrary - \VAR{stride} argument, whereas the \VAR{logPE\_stride} argument to the - active set collective operations only permits strides that are a power of two. - Arbitrary strides allow a greater number of PE subsets to be expressed - and can support a broader range of usage models. + \VAR{stride} argument. Arbitrary strides allow a greater number of + PE subsets to be expressed and can support a broader range of usage models. See the description of team handles and predefined teams in Section~\ref{subsec:team} for more information about team handle semantics and usage. From 9fa187b332f6538d61416062868a7ec203ebc0a3 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Thu, 28 Mar 2024 09:46:40 -0400 Subject: [PATCH 04/20] Indent in shmem_alltoall --- content/shmem_alltoall.tex | 2 ++ 1 file changed, 2 insertions(+) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index c37823d8b..4b4b92ebf 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -131,6 +131,7 @@ Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following conditions must be ensured: + \begin{itemize} \item The \VAR{dest} data object on all \acp{PE} in the active set is ready to accept the \FUNC{shmem\_alltoall} data. @@ -138,6 +139,7 @@ on all \acp{PE} in the active set is not still in use from a prior call to a \FUNC{shmem\_alltoall} routine. \end{itemize} + Otherwise, the behavior is undefined. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for From 111a29cfae30585aeb5887b8ee9973450307c663 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Apr 2024 13:10:02 -0400 Subject: [PATCH 05/20] Update content/collective_intro.tex Typo Co-authored-by: David Ozog --- content/collective_intro.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index c3485e532..655885160 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -12,7 +12,7 @@ \item Collective routines that operate on active sets use a set of parameters to determine which \acp{PE} will participate and what resources are used to perform operations. -\item Collective routines that do not accept an active set +\item Collective routines that do not accept active set parameters and, as required, the default context. \end{DeprecateBlock} From dda26c6d07a1a2ea018348af4451750c5f6aa3bf Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Apr 2024 13:10:23 -0400 Subject: [PATCH 06/20] Update content/shmem_broadcast.tex White Space Co-authored-by: David Ozog --- content/shmem_broadcast.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 5aec7b9dc..2470ec39b 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -136,7 +136,7 @@ active set is not still in use from a prior call to an \openshmem collective routine. \end{itemize} - Otherwise, the behavior is undefined. + Otherwise, the behavior is undefined. Upon return from a active-based broadcast routine, the following are true for the local \ac{PE}: From 45e710bb52af78b75098df7a240621332ff741fd Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Apr 2024 13:12:21 -0400 Subject: [PATCH 07/20] Update shmem_reductions.tex Typo, uppercase --- content/shmem_reductions.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index be5543c26..79f0b42a1 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -251,8 +251,8 @@ \subsubsubsection{PROD} \apiargument{IN}{source}{Symmetric address of an array, of length \VAR{nreduce} elements, that contains one element for each separate reduction routine. The type of \source{} should match that implied in the SYNOPSIS section.} -\apiargument{IN}{nreduce}{the number of elements in the \dest{} and \source{} - arrays. in teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. +\apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} + arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. In deprecated active-set based \ac{API} calls, \VAR{nreduce} must be of type integer.} From 41a0024b27aa3593848a5b942612f43274910bb8 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 11:53:45 -0400 Subject: [PATCH 08/20] Update shmem_team_split_strided API Note, arbirary to any positive integer. --- content/shmem_team_split_strided.tex | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 0385bd252..d22a5ffd1 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -96,9 +96,8 @@ } \apinotes{ - The \FUNC{shmem\_team\_split\_strided} operation uses an arbitrary - \VAR{stride} argument. Arbitrary strides allow a greater number of - PE subsets to be expressed and can support a broader range of usage models. + The \FUNC{shmem\_team\_split\_strided} operation can take any positive integer value + \VAR{stride} argument. See the description of team handles and predefined teams in Section~\ref{subsec:team} for more information about team handle semantics and usage. From a6532ef7ae0eb6bf4eb293bb47bb33f4d7dc5d85 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:34:06 -0400 Subject: [PATCH 09/20] Fix Whitespace in shmem_alltoall --- content/shmem_alltoall.tex | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 4b4b92ebf..bcd531562 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -42,10 +42,10 @@ destination \ac{PE}. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nelems}{ - The number of elements to exchange for each \ac{PE}. - For \FUNC{shmem\_alltoallmem}, elements are bytes; - for \FUNC{shmem\_alltoall\{32,64\}}, elements are 4 or 8 bytes, - respectively. + The number of elements to exchange for each \ac{PE}. + For \FUNC{shmem\_alltoallmem}, elements are bytes; + for \FUNC{shmem\_alltoall\{32,64\}}, elements are 4 or 8 bytes, + respectively. } \begin{DeprecateBlock} @@ -105,14 +105,14 @@ \begin{itemize} \item The \VAR{dest} data object on all \acp{PE} in the team is ready to accept the \FUNC{shmem\_alltoall} data. - \end{itemize} + \end{itemize} Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: \begin{itemize} - \item Its \VAR{dest} symmetric data object is completely updated and the - data has been copied out of the source data object. - \end{itemize} + \item Its \VAR{dest} symmetric data object is completely updated and the + data has been copied out of the source data object. + \end{itemize} \begin{DeprecateBlock} Active-set-based collective routines operate over all \acp{PE} in the active set @@ -145,8 +145,8 @@ Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: \begin{itemize} - \item Its \VAR{dest} symmetric data object is completely updated and the - data has been copied out of the source data object. + \item Its \VAR{dest} symmetric data object is completely updated and the + data has been copied out of the source data object. \item For active-set-based routines, the values in the \VAR{pSync} array are restored to the original values. \end{itemize} From c3b23e5649ae9201c9b68b3fe7e77f726469f7af Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:37:34 -0400 Subject: [PATCH 10/20] Fix whitespace shmem_broadcast --- content/shmem_broadcast.tex | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 2470ec39b..49abd50be 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -96,9 +96,9 @@ Upon return from a team-based broadcast routine, the following are true for the local \ac{PE}: \begin{itemize} - \item The \dest{} data object is updated. - \item The \source{} data object may be safely reused. - \end{itemize} + \item The \dest{} data object is updated. + \item The \source{} data object may be safely reused. + \end{itemize} \begin{DeprecateBlock} \openshmem active-set broadcast routines are collective routines over an active set. @@ -111,30 +111,30 @@ For active-set-based broadcasts: \begin{itemize} - \item The \VAR{dest} object is updated on all PEs other than the root PE. - \item All \acp{PE} in the active set defined by the - \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet - must participate in the operation. - \item Only \acp{PE} in the active set may call the routine. If a - \ac{PE} not in the active set calls an active-set-based + \item The \VAR{dest} object is updated on all PEs other than the root PE. + \item All \acp{PE} in the active set defined by the + \VAR{PE\_start}, \VAR{logPE\_stride}, \VAR{PE\_size} triplet + must participate in the operation. + \item Only \acp{PE} in the active set may call the routine. If a + \ac{PE} not in the active set calls an active-set-based collective routine, the behavior is undefined. - \item The values of arguments \VAR{PE\_root}, \VAR{PE\_start}, + \item The values of arguments \VAR{PE\_root}, \VAR{PE\_start}, \VAR{logPE\_stride}, and \VAR{PE\_size} must be the same value on all \acp{PE} in the active set. - \item The value of \VAR{PE\_root} must be between \CONST{0} and + \item The value of \VAR{PE\_root} must be between \CONST{0} and \VAR{PE\_size $-$ 1}. - \item The same \VAR{pSync} work array must be passed by all \acp{PE} + \item The same \VAR{pSync} work array must be passed by all \acp{PE} in the active set. \end{itemize} Before any \ac{PE} calls a active-set-based broadcast routine, the following conditions must be ensured: \begin{itemize} - \item The \dest{} array on all \acp{PE} participating in the broadcast - is ready to accept the broadcast data. - \item The \VAR{pSync} array on all \acp{PE} in the - active set is not still in use from a prior call to an \openshmem - collective routine. + \item The \dest{} array on all \acp{PE} participating in the broadcast + is ready to accept the broadcast data. + \item The \VAR{pSync} array on all \acp{PE} in the + active set is not still in use from a prior call to an \openshmem + collective routine. \end{itemize} Otherwise, the behavior is undefined. From a3b9ea7ec036c17ae5b98c55dffafe5f7fe3976c Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:38:52 -0400 Subject: [PATCH 11/20] Edit Whitespace in shmem_collect --- content/shmem_collect.tex | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 68b3e614f..d14d8f17b 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -67,12 +67,12 @@ operation to concatenate \VAR{nelems} data items from the \source{} array into the \dest{} array, over an \openshmem team in processor number order. - The resultant \dest{} array contains the contribution from + The resultant \dest{} array contains the contribution from \acp{PE} as follows: - \begin{itemize} - \item For a team, the data from \ac{PE} number \CONST{0} in the team is first, then the - contribution from \ac{PE} \CONST{1} in the team, and so on. + \begin{itemize} + \item For a team, the data from \ac{PE} number \CONST{0} in the team is first, then the + contribution from \ac{PE} \CONST{1} in the team, and so on. \end{itemize} The collected result is written to the \dest{} array for all \acp{PE} @@ -96,9 +96,9 @@ in processor number order. The resultant \dest{} array contains the contribution from \acp{PE} as follows: \begin{itemize} - \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the - contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. - \end{itemize} + \item For an active set, the data from \ac{PE} \VAR{PE\_start} is first, then the + contribution from \ac{PE} \VAR{PE\_start} + \VAR{PE\_stride} second, and so on. + \end{itemize} The collected result is written to the \dest{} array for all \acp{PE} that participate in the operation. The same \dest{} and \source{} From a621dd4c0216ed2bcc6d44d9c27a8eef31dd2da9 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:40:34 -0400 Subject: [PATCH 12/20] Fix Whitespace in collective_intro --- content/collective_intro.tex | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/content/collective_intro.tex b/content/collective_intro.tex index 655885160..56ba76f18 100644 --- a/content/collective_intro.tex +++ b/content/collective_intro.tex @@ -4,21 +4,21 @@ \openshmem provides four types of collective routines: \begin{enumerate} -\item Collective routines that operate on teams use a team handle parameter to determine - which \acp{PE} will participate in the routine, and use resources encapsulated by the team object - to perform operations. See Section~\ref{subsec:team} for details on team management. + \item Collective routines that operate on teams use a team handle parameter to determine + which \acp{PE} will participate in the routine, and use resources encapsulated by the team object + to perform operations. See Section~\ref{subsec:team} for details on team management. -\begin{DeprecateBlock} -\item Collective routines that operate on active sets use a set of parameters to determine - which \acp{PE} will participate and what resources are used to perform operations. + \begin{DeprecateBlock} + \item Collective routines that operate on active sets use a set of parameters to determine + which \acp{PE} will participate and what resources are used to perform operations. -\item Collective routines that do not accept active set - parameters and, as required, the default context. -\end{DeprecateBlock} + \item Collective routines that do not accept active set + parameters and, as required, the default context. + \end{DeprecateBlock} -\item Collective routines that do not accept team - parameters, which implicitly operate on the world team and, as - required, the default context. + \item Collective routines that do not accept team + parameters, which implicitly operate on the world team and, as + required, the default context. \end{enumerate} Concurrent accesses to symmetric memory by an \openshmem collective From a2d9daad1f2b0f558869b6f933f96107f1f4766e Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 26 Jul 2024 13:59:23 -0400 Subject: [PATCH 13/20] Fix Typo in shmem_alltoall --- content/shmem_alltoall.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index bcd531562..4e145c266 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -35,7 +35,7 @@ \apiargument{OUT}{dest}{Symmetric address of a data object large enough to receive the combined total of \VAR{nelems} elements from each \ac{PE} in the - particpating \acp{PE}. + participating \acp{PE}. The type of \dest{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{source}{Symmetric address of a data object that contains \VAR{nelems} elements of data for each \ac{PE} in the participating \acp{PE}, ordered according to From fd98952fc9701cd9c9cb0ec46129f0776521980d Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Thu, 29 Aug 2024 14:58:02 -0400 Subject: [PATCH 14/20] Update content/shmem_team_split_strided.tex Co-authored-by: David Ozog --- content/shmem_team_split_strided.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_team_split_strided.tex b/content/shmem_team_split_strided.tex index 59decede7..26616d396 100644 --- a/content/shmem_team_split_strided.tex +++ b/content/shmem_team_split_strided.tex @@ -101,7 +101,7 @@ } \apinotes{ - The \FUNC{shmem\_team\_split\_strided} operation can take any positive integer value + The \FUNC{shmem\_team\_split\_strided} operation can take any integer value \VAR{stride} argument. See the description of team handles and predefined teams in From a1e23bd1cfaf9d141545bb7794838d31439aff35 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Thu, 29 Aug 2024 16:43:36 -0400 Subject: [PATCH 15/20] scan: 488 section committee edits (nelems/overlap) --- content/shmem_scan.tex | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/content/shmem_scan.tex b/content/shmem_scan.tex index 618a51a05..69f05bc96 100644 --- a/content/shmem_scan.tex +++ b/content/shmem_scan.tex @@ -6,16 +6,16 @@ %% C11 \begin{C11synopsis} -int @\FuncDecl{shmem\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); -int @\FuncDecl{shmem\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); +int @\FuncDecl{shmem\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); +int @\FuncDecl{shmem\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); \end{C11synopsis} where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation as specified by Table \ref{teamreducetypes}. %% C/C++ \begin{Csynopsis} -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); -int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nreduce); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_inscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); +int @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_sum\_exscan}@(shmem_team_t team, TYPE *dest, const TYPE *source, size_t nelems); \end{Csynopsis} where \TYPE{} is one of the integer, real, or complex types supported for the SUM operation and has a corresponding \TYPENAME{} as specified @@ -26,17 +26,17 @@ The team over which to perform the operation. } \apiargument{OUT}{dest}{ - Symmetric address of an array, of length \VAR{nreduce} elements, - to receive the result of the scan routines. The type of + Symmetric address of an array, of length \VAR{nelems} elements, + to receive the result of the scan operation. The type of \dest{} should match that implied in the SYNOPSIS section. } \apiargument{IN}{source}{ - Symmetric address of an array, of length \VAR{nreduce} elements, - that contains one element for each separate scan routine. + Symmetric address of an array, of length \VAR{nelems} elements, + that contains one element for each separate scan operation. The type of \source{} should match that implied in the SYNOPSIS section. } - \apiargument{IN}{nreduce}{ + \apiargument{IN}{nelems}{ The number of elements in the \dest{} and \source{} arrays. } \end{apiarguments} @@ -49,7 +49,7 @@ multiple \acp{PE}. The scan operations are performed with the SUM operator. - The \VAR{nreduce} argument determines the number of separate scan + The \VAR{nelems} argument determines the number of separate scan operations to perform. The \source{} array on all \acp{PE} participating in the operation provides one element for each scan. The results of the scan operations are placed in the \dest{} array @@ -75,10 +75,14 @@ \end{cases} \end{equation*} + + The same \source{} and \dest{} arrays must be passed by all PEs that + participate in the collective. The \source{} and \dest{} arguments must either be the same symmetric address, or two different symmetric addresses - corresponding to buffers that do not overlap in memory. That is, - they must be completely overlapping or completely disjoint. + corresponding to buffers that do not overlap in memory. + That is, they must be completely overlapping (sometimes referred to as an + ``in place'' reduction) or completely disjoint. Team-based scan routines operate over all \acp{PE} in the provided team argument. All \acp{PE} in the provided team must participate in From 129573e4ab8b6e2f7975eef39044a1aa5922b19e Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 30 Aug 2024 10:24:23 -0400 Subject: [PATCH 16/20] Update content/shmem_broadcast.tex typo Co-authored-by: Muhammad Awad --- content/shmem_broadcast.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 49abd50be..d67c2fb0f 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -138,7 +138,7 @@ \end{itemize} Otherwise, the behavior is undefined. - Upon return from a active-based broadcast routine, the following are true for the local + Upon return from an active-based broadcast routine, the following are true for the local \ac{PE}: \begin{itemize} \item If the current PE is not the root PE, the \dest{} data object is updated. From 421cc8bc713a12c1ea3afbccd764437304d391d6 Mon Sep 17 00:00:00 2001 From: Kevin Waters Date: Fri, 30 Aug 2024 11:00:53 -0400 Subject: [PATCH 17/20] Remove active language in reduction api args --- content/shmem_reductions.tex | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 79f0b42a1..46cb0abe1 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -252,9 +252,7 @@ \subsubsubsection{PROD} contains one element for each separate reduction routine. The type of \source{} should match that implied in the SYNOPSIS section.} \apiargument{IN}{nreduce}{The number of elements in the \dest{} and \source{} - arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t. - In deprecated active-set based \ac{API} calls, - \VAR{nreduce} must be of type integer.} + arrays. In teams based \ac{API} calls, \VAR{nreduce} must be of type size\_t.} \begin{DeprecateBlock} \apiargument{IN}{nreduce}{In active-set based \ac{API} calls, From 0b47caf9e8289bd0f3f7d7d49924ce2ef77fff72 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 30 Aug 2024 11:56:43 -0400 Subject: [PATCH 18/20] collectives: clarify src buffer entry requirements --- content/shmem_alltoall.tex | 12 ++++++++---- content/shmem_broadcast.tex | 15 +++++++++------ content/shmem_collect.tex | 11 +++++++++++ content/shmem_reductions.tex | 12 ++++++++---- content/shmem_scan.tex | 13 ++++++++++--- 5 files changed, 46 insertions(+), 17 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 4e145c266..90440511e 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -100,12 +100,16 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, - the following conditions must be ensured: + Before any \ac{PE} calls a \FUNC{shmem\_alltoall} routine, the following + conditions must be ensured, otherwise the behavior is undefined: \begin{itemize} - \item The \VAR{dest} data object on all \acp{PE} in the team is - ready to accept the \FUNC{shmem\_alltoall} data. + \item The \dest{} array on all \acp{PE} in the team is ready to + accept the result of the operation. + \item The \source{} buffer at the local \ac{PE} is ready to be + read by any \ac{PE} in the team. \end{itemize} + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index d67c2fb0f..05b670682 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -85,13 +85,16 @@ the team. \end{itemize} - Before any \ac{PE} calls a broadcast routine, the following - conditions must be ensured: + Before any \ac{PE} calls a broadcast routine, the following conditions + must be ensured, otherwise the behavior is undefined: \begin{itemize} - \item The \dest{} array on all \acp{PE} participating in the broadcast - is ready to accept the broadcast data. - \end{itemize} - Otherwise, the behavior is undefined. + \item The \dest{} array on all \acp{PE} in the team is ready to + accept the result of the operation. + \item The \source{} buffer at the local root \ac{PE} is ready to be + read by any \ac{PE} in the team. + \end{itemize} + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. Upon return from a team-based broadcast routine, the following are true for the local \ac{PE}: diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index d14d8f17b..479c93e29 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -88,6 +88,17 @@ If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. + Before any \ac{PE} calls a collect routine, the following conditions must + be ensured, otherwise the behavior is undefined: + \begin{itemize} + \item The \dest{} array on all \acp{PE} in the team is ready to + accept the result of the operation. + \item The \source{} buffer at the local \ac{PE} is ready to be read + by any \ac{PE} in the team. + \end{itemize} + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. + \begin{DeprecateBlock} \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective operation to concatenate \VAR{nelems} diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 46cb0abe1..888a51e19 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -295,12 +295,16 @@ \subsubsubsection{PROD} If \VAR{team} compares equal to \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a reduction routine, the following conditions must be ensured: + Before any \ac{PE} calls a reduction routine, the following conditions + must be ensured, otherwise the behavior is undefined: \begin{itemize} - \item The \dest{} array on all \acp{PE} participating in the reduction - is ready to accept the results of the \OPR{reduction}. + \item The \dest{} array on all \acp{PE} in the team is ready to + accept the results of the operation. + \item The \source{} buffer at the local \ac{PE} is ready to be read by + any \ac{PE} in the team. \end{itemize} - Otherwise, the behavior is undefined. + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. Upon return from a reduction routine, the following are true for the local \ac{PE}: diff --git a/content/shmem_scan.tex b/content/shmem_scan.tex index 618a51a05..185c52d13 100644 --- a/content/shmem_scan.tex +++ b/content/shmem_scan.tex @@ -86,9 +86,16 @@ \LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior is undefined. - Before any \ac{PE} calls a scan routine, the \dest{} array on all - \acp{PE} participating in the operation must be ready to accept the - results of the operation. Otherwise, the behavior is undefined. + Before any \ac{PE} calls a scan routine, the following conditions must be + ensured, otherwise the behavior is undefined: + \begin{itemize} + \item The \dest{} array on all \acp{PE} in the team is ready to accept + the result of the operation. + \item The \source{} buffer at the local \ac{PE} is ready to be read by + any \ac{PE} in the team. + \end{itemize} + The application does not need to synchronize to ensure that the \source{} + buffer is ready across all \acp{PE} prior to calling this routine. Upon return from a scan routine, the following are true for the local \ac{PE}: the \dest{} array is updated, and the \source{} array From de1315d654b8ec5881484143e007a4ce3c013d22 Mon Sep 17 00:00:00 2001 From: Muhammad Awad Date: Fri, 30 Aug 2024 11:53:46 -0700 Subject: [PATCH 19/20] Remove unnecessary new line --- content/shmem_alltoall.tex | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 4e145c266..190232d7c 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -89,9 +89,7 @@ Given a \ac{PE} \VAR{i} that is the \kth \ac{PE} participating in the operation and a \ac{PE} \VAR{j} that is the \lth \ac{PE} - participating in the operation, - - \ac{PE} \VAR{i} sends the \lth block of its \VAR{source} object to + participating in the operation, \ac{PE} \VAR{i} sends the \lth block of its \VAR{source} object to the \kth block of the \VAR{dest} object of \ac{PE} \VAR{j}. From 8095ea451dfcd3f5f48da7affc668fe8095b87e3 Mon Sep 17 00:00:00 2001 From: David Ozog Date: Fri, 30 Aug 2024 15:29:21 -0400 Subject: [PATCH 20/20] collectives: "array" instead of source "buffer" --- content/shmem_alltoall.tex | 4 ++-- content/shmem_broadcast.tex | 4 ++-- content/shmem_collect.tex | 4 ++-- content/shmem_reductions.tex | 4 ++-- content/shmem_scan.tex | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/content/shmem_alltoall.tex b/content/shmem_alltoall.tex index 90440511e..ba0b43a77 100644 --- a/content/shmem_alltoall.tex +++ b/content/shmem_alltoall.tex @@ -105,11 +105,11 @@ \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the result of the operation. - \item The \source{} buffer at the local \ac{PE} is ready to be + \item The \source{} array at the local \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. Upon return from a \FUNC{shmem\_alltoall} routine, the following is true for the local PE: diff --git a/content/shmem_broadcast.tex b/content/shmem_broadcast.tex index 05b670682..bd936b5f8 100644 --- a/content/shmem_broadcast.tex +++ b/content/shmem_broadcast.tex @@ -90,11 +90,11 @@ \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the result of the operation. - \item The \source{} buffer at the local root \ac{PE} is ready to be + \item The \source{} array at the local root \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. Upon return from a team-based broadcast routine, the following are true for the local \ac{PE}: diff --git a/content/shmem_collect.tex b/content/shmem_collect.tex index 479c93e29..b7e2d3fac 100644 --- a/content/shmem_collect.tex +++ b/content/shmem_collect.tex @@ -93,11 +93,11 @@ \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the result of the operation. - \item The \source{} buffer at the local \ac{PE} is ready to be read + \item The \source{} array at the local \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. \begin{DeprecateBlock} \openshmem \FUNC{collect} and \FUNC{fcollect} routines perform a collective diff --git a/content/shmem_reductions.tex b/content/shmem_reductions.tex index 888a51e19..fa48bb3d8 100644 --- a/content/shmem_reductions.tex +++ b/content/shmem_reductions.tex @@ -300,11 +300,11 @@ \subsubsubsection{PROD} \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the results of the operation. - \item The \source{} buffer at the local \ac{PE} is ready to be read by + \item The \source{} array at the local \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. Upon return from a reduction routine, the following are true for the local \ac{PE}: diff --git a/content/shmem_scan.tex b/content/shmem_scan.tex index 185c52d13..35338a517 100644 --- a/content/shmem_scan.tex +++ b/content/shmem_scan.tex @@ -91,11 +91,11 @@ \begin{itemize} \item The \dest{} array on all \acp{PE} in the team is ready to accept the result of the operation. - \item The \source{} buffer at the local \ac{PE} is ready to be read by + \item The \source{} array at the local \ac{PE} is ready to be read by any \ac{PE} in the team. \end{itemize} The application does not need to synchronize to ensure that the \source{} - buffer is ready across all \acp{PE} prior to calling this routine. + array is ready across all \acp{PE} prior to calling this routine. Upon return from a scan routine, the following are true for the local \ac{PE}: the \dest{} array is updated, and the \source{} array