Skip to content

Commit

Permalink
Merge pull request #448 from jdinan/pr/ib-ops
Browse files Browse the repository at this point in the history
Add Block-Strided RMA Operations
  • Loading branch information
jdinan authored Oct 4, 2023
2 parents 91a2495 + 387ef8e commit a0b22f7
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 21 deletions.
9 changes: 7 additions & 2 deletions content/backmatter.tex
Original file line number Diff line number Diff line change
Expand Up @@ -654,12 +654,17 @@ \subsection{Table~\ref{p2psynctypes}: point-to-point synchronization types}
\chapter{Changes to this Document}\label{sec:changelog}

\section{Version 1.6}
Major changes in \openshmem[1.6] include the addition of a new
\FUNC{shmem\_team\_ptr} function.
Major changes in \openshmem[1.6] include the addition of the new
\FUNC{shmem\_team\_ptr}, \FUNC{shmem\_ibget}, and \FUNC{shmem\_ibput}
functions.

The following list describes the specific changes in \openshmem[1.6]:
\begin{itemize}
%
\item Added interleaved block transfer APIs \FUNC{shmem\_ibget} and
\FUNC{shmem\_ibput}.
\ChangelogRef{subsec:shmem_ibget, subsec:shmem_ibput}%
%
\item Added \FUNC{shmem\_signal\_add} and \FUNC{shmem\_signal\_set} to
update a remote flag without associated data transfer of a put-with-signal operation.
\ChangelogRef{subsec:shmem_signal_add, subsec:shmem_signal_set}%
Expand Down
65 changes: 65 additions & 0 deletions content/shmem_ibget.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
\apisummary{
Copies blocks of strided data from a specified \ac{PE}.
}

\begin{apidefinition}

\begin{C11synopsis}
void @\FuncDecl{shmem\_ibget}@(TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t bsize, size_t nblocks, int pe);
void @\FuncDecl{shmem\_ibget}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t bsize, size_t nblocks, int pe);
\end{C11synopsis}
where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}.

\begin{Csynopsis}
void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_ibget}@(TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t bsize, size_t nblocks, int pe);
void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_ibget}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t bsize, size_t nblocks, int pe);
\end{Csynopsis}
where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}.

\begin{CsynopsisCol}
void @\FuncDecl{shmem\_ibget\FuncParam{SIZE}}@(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t bsize, size_t nblocks, int pe);
void @\FuncDecl{shmem\_ctx\_ibget\FuncParam{SIZE}}@(shmem_ctx_t ctx, void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t bsize, size_t nblocks, int pe);
\end{CsynopsisCol}
where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}.

\begin{apiarguments}
\apiargument{IN}{ctx}{A context handle specifying the context on which to perform the operation.
When this argument is not provided, the operation is performed on
the default context.}
\apiargument{OUT}{dest}{Local address of the array to be updated.
The type of \dest{} should match that implied in the SYNOPSIS section.}
\apiargument{IN}{source}{Symmetric address of the source array data object.
The type of \source{} should match that implied in the SYNOPSIS section.}
\apiargument{IN}{dst}{The stride between consecutive blocks of the \dest{}
array. The stride must be greater than or equal to \VAR{bsize} and is
scaled by the element size of the \dest{} array. A value of \VAR{bsize}
indicates contiguous data.}
\apiargument{IN}{sst}{The stride between consecutive blocks of the \source{}
array. The stride must be greater than or equal to \VAR{bsize} and is
scaled by the element size of the \source{} array. A value of \VAR{bsize}
indicates contiguous data.}
\apiargument{IN}{bsize}{Number of elements per block in the \dest{} and \source{}
arrays.}
\apiargument{IN}{nblocks}{Number of blocks to be copied from the \source{} array
to the \dest{} array.}
\apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.}
\end{apiarguments}

\apidescription{
The \FUNC{shmem\_ibget} routines provide a method for copying strided data blocks from
a symmetric array from a specified remote \ac{PE} to strided locations on a
local array. The routines return when the data has been copied into the local
\VAR{dest} array.
}

\apireturnvalues{
None.
}

\apinotes{
The \FUNC{shmem\_ibget} API provides a more general purpose interleaved
transfer API than \FUNC{shmem\_iget}. Calling \FUNC{shmem\_ibget} with a
block size of 1 is equivalent to the \FUNC{shmem\_iget} API.
}

\end{apidefinition}
68 changes: 68 additions & 0 deletions content/shmem_ibput.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
\apisummary{
Copies strided data blocks to a specified \ac{PE}.
}

\begin{apidefinition}

\begin{C11synopsis}
void @\FuncDecl{shmem\_ibput}@(TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t bsize, size_t nblocks, int pe);
void @\FuncDecl{shmem\_ibput}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t bsize, size_t nblocks, int pe);
\end{C11synopsis}
where \TYPE{} is one of the standard \ac{RMA} types specified by Table \ref{stdrmatypes}.

\begin{Csynopsis}
void @\FuncDecl{shmem\_\FuncParam{TYPENAME}\_ibput}@(TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t bsize, size_t nblocks, int pe);
void @\FuncDecl{shmem\_ctx\_\FuncParam{TYPENAME}\_ibput}@(shmem_ctx_t ctx, TYPE *dest, const TYPE *source, ptrdiff_t dst, ptrdiff_t sst, size_t bsize, size_t nblocks, int pe);
\end{Csynopsis}
where \TYPE{} is one of the standard \ac{RMA} types and has a corresponding \TYPENAME{} specified by Table \ref{stdrmatypes}.

\begin{CsynopsisCol}
void @\FuncDecl{shmem\_ibput\FuncParam{SIZE}}@(void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t bsize, size_t nblocks, int pe);
void @\FuncDecl{shmem\_ctx\_ibput\FuncParam{SIZE}}@(shmem_ctx_t ctx, void *dest, const void *source, ptrdiff_t dst, ptrdiff_t sst, size_t bsize, size_t nblocks, int pe);
\end{CsynopsisCol}
where \SIZE{} is one of \CONST{8, 16, 32, 64, 128}.

\begin{apiarguments}
\apiargument{IN}{ctx}{A context handle specifying the context on which to perform the operation.
When this argument is not provided, the operation is performed on
the default context.}
\apiargument{OUT}{dest}{Symmetric address of the destination array data object.
The type of \dest{} should match that implied in the SYNOPSIS section.}
\apiargument{IN}{source}{Local address of the array containing the data to be copied.
The type of \source{} should match that implied in the SYNOPSIS section.}
\apiargument{IN}{dst}{The stride between consecutive blocks of the \dest{}
array. The stride must be greater than or equal to \VAR{bsize} and is
scaled by the element size of the \dest{} array. A value of \VAR{bsize}
indicates contiguous data.}
\apiargument{IN}{sst}{The stride between consecutive blocks of the \source{}
array. The stride must be greater than or equal to \VAR{bsize} and is
scaled by the element size of the \source{} array. A value of \VAR{bsize}
indicates contiguous data.}
\apiargument{IN}{bsize}{Number of elements per block in the \dest{} and \source{}
arrays.}
\apiargument{IN}{nblocks}{Number of blocks to be copied from the \source{} array
to the \dest{} array.}
\apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.}
\end{apiarguments}


\apidescription{
The \FUNC{shmem\_ibput} routines provide a method for copying strided data
blocks (specified by \VAR{sst}) of an array from a \source{} array on the
local \ac{PE} to locations specified by stride \VAR{dst} on a \dest{} array
on specified remote \ac{PE}. The routines return when the data has
been copied out of the \VAR{source} array on the local \ac{PE} but not
necessarily before the data has been delivered to the remote data object.
}

\apireturnvalues{
None.
}

\apinotes{
The \FUNC{shmem\_ibput} API provides a more general purpose interleaved
transfer API than \FUNC{shmem\_iput}. Calling \FUNC{shmem\_ibput} with a
block size of 1 is equivalent to the \FUNC{shmem\_iput} API.
}

\end{apidefinition}
14 changes: 8 additions & 6 deletions content/shmem_iget.tex
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,20 @@
\apiargument{IN}{source}{Symmetric address of the source array data object.
The type of \source{} should match that implied in the SYNOPSIS section.}
\apiargument{IN}{dst}{The stride between consecutive elements of the \dest{}
array. The stride is scaled by the element size of the \dest{} array.
A value of \CONST{1} indicates contiguous data.}
\apiargument{IN}{sst}{The stride between consecutive elements of the
\source{} array. The stride is scaled by the element size of the \source{}
array. A value of \CONST{1} indicates contiguous data.}
array. The stride must be greater than or equal to \CONST{1} and is
scaled by the element size of the \dest{} array. A value of \CONST{1}
indicates contiguous data.}
\apiargument{IN}{sst}{The stride between consecutive elements of the \source{}
array. The stride must be greater than or equal to \CONST{1} and is
scaled by the element size of the \source{} array. A value of \CONST{1}
indicates contiguous data.}
\apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{}
arrays.}
\apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.}
\end{apiarguments}

\apidescription{
The \FUNC{iget} routines provide a method for copying strided data elements from
The \FUNC{shmem\_iget} routines provide a method for copying strided data elements from
a symmetric array from a specified remote \ac{PE} to strided locations on a
local array. The routines return when the data has been copied into the local
\VAR{dest} array.
Expand Down
22 changes: 9 additions & 13 deletions content/shmem_iput.tex
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,23 @@
\apiargument{IN}{source}{Local address of the array containing the data to be copied.
The type of \source{} should match that implied in the SYNOPSIS section.}
\apiargument{IN}{dst}{The stride between consecutive elements of the \dest{}
array. The stride is scaled by the element size of the \dest{} array. A
value of \CONST{1} indicates contiguous data.}
\apiargument{IN}{sst}{The stride between consecutive elements of the
\source{} array. The stride is scaled by the element size of the \source{}
array. A value of \CONST{1} indicates contiguous data.}
array. The stride must be greater than or equal to \CONST{1} and is
scaled by the element size of the \dest{} array. A value of \CONST{1}
indicates contiguous data.}
\apiargument{IN}{sst}{The stride between consecutive elements of the \source{}
array. The stride must be greater than or equal to \CONST{1} and is
scaled by the element size of the \source{} array. A value of \CONST{1}
indicates contiguous data.}
\apiargument{IN}{nelems}{Number of elements in the \dest{} and \source{} arrays.}
\apiargument{IN}{pe}{\ac{PE} number of the remote \ac{PE}.}
\end{apiarguments}


\apidescription{
The \FUNC{iput} routines provide a method for copying strided data
The \FUNC{shmem\_iput} routines provide a method for copying strided data
elements (specified by \VAR{sst}) of an array from a \source{} array on the
local \ac{PE} to locations specified by stride \VAR{dst} on a \dest{} array
on specified remote \ac{PE}. Both strides, \VAR{dst} and \VAR{sst}, must be
greater than or equal to \CONST{1}. The routines return when the data has
on specified remote \ac{PE}. The routines return when the data has
been copied out of the \VAR{source} array on the local \ac{PE} but not
necessarily before the data has been delivered to the remote data object.
}
Expand All @@ -55,11 +56,6 @@
None.
}

\apinotes{
See Section \ref{subsec:memory_model} for a definition of the term
remotely accessible.
}

\begin{apiexamples}

\apicexample
Expand Down
6 changes: 6 additions & 0 deletions main_spec.tex
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ \subsubsubsection{\textbf{SHMEM\_P}}\label{subsec:shmem_p}
\subsubsubsection{\textbf{SHMEM\_IPUT}}\label{subsec:shmem_iput}
\input{content/shmem_iput.tex}

\subsubsubsection{\textbf{SHMEM\_IBPUT}}\label{subsec:shmem_ibput}
\input{content/shmem_ibput.tex}

\subsubsubsection{\textbf{SHMEM\_GET}}\label{subsec:shmem_get}
\input{content/shmem_get.tex}

Expand All @@ -189,6 +192,9 @@ \subsubsubsection{\textbf{SHMEM\_G}}\label{subsec:shmem_g}
\subsubsubsection{\textbf{SHMEM\_IGET}}\label{subsec:shmem_iget}
\input{content/shmem_iget.tex}

\subsubsubsection{\textbf{SHMEM\_IBGET}}\label{subsec:shmem_ibget}
\input{content/shmem_ibget.tex}

\subsubsection{Nonblocking Remote Memory Access Routines}\label{subsec:rma_nbi}

\subsubsubsection{\textbf{SHMEM\_PUT\_NBI}}\label{subsec:shmem_put_nbi}
Expand Down

0 comments on commit a0b22f7

Please sign in to comment.