Skip to content

Commit

Permalink
Add ability of VTune profiling (#2987)
Browse files Browse the repository at this point in the history
To enable the kernel profiling in oneDAL build use REQPROFILE=yes option of the makefile.
After that run VTune profiling as usual to get performance profiling data.
  • Loading branch information
Vika-F authored Nov 27, 2024
1 parent 713ac65 commit f32ae79
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 11 deletions.
10 changes: 9 additions & 1 deletion INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ is available as an alternative to the manual setup.
4. Set up MKL:

_Note: if you used the general oneAPI setvars script from a Base Toolkit installation, this step will not be necessary as oneMKL will already have been set up._

Download and install [Intel(R) oneMKL](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html).
Set the environment variables for for Intel(R) oneMKL. For example:

Expand Down Expand Up @@ -149,7 +149,15 @@ It is possible to build oneDAL libraries with selected set of algorithms and/or

make -f makefile daal PLAT=win32e CORE.ALGORITHMS.CUSTOM=low_order_moments REQCPU=avx2 -j16

On **Linux\*** it is possible to build debug version of oneDAL or the version that allows to do kernel profiling using <ittnotify.h>.

- To build debug version of oneDAL, run:

make -f makefile daal oneapi_c PLAT=lnx32e REQDBG=yes

- To build oneDAL with kernel profiling information, run:

make -f makefile daal oneapi_c PLAT=lnx32e REQPROFILE=yes

---
**NOTE:** Built libraries are located in the `__release_{os_name}[_{compiler_name}]/daal` directory.
Expand Down
26 changes: 26 additions & 0 deletions cpp/daal/src/externals/service_profiler.cpp
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,31 @@ namespace daal
{
namespace internal
{
#ifdef ONEDAL_KERNEL_PROFILER

ProfilerTask::ProfilerTask(const char * taskName) : _taskName(taskName)
{
_handle = __itt_string_handle_create(taskName);

__itt_task_begin(Profiler::getDomain(), __itt_null, __itt_null, _handle);
}

ProfilerTask::~ProfilerTask()
{
Profiler::endTask(_taskName);
}

ProfilerTask Profiler::startTask(const char * taskName)
{
return ProfilerTask(taskName);
}

void Profiler::endTask(const char * taskName)
{
__itt_task_end(Profiler::getDomain());
}

#else
ProfilerTask Profiler::startTask(const char * taskName)
{
return ProfilerTask(taskName);
Expand All @@ -34,6 +59,7 @@ ProfilerTask::~ProfilerTask()
{
Profiler::endTask(_taskName);
}
#endif

} // namespace internal
} // namespace daal
69 changes: 67 additions & 2 deletions cpp/daal/src/externals/service_profiler.h
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -24,34 +24,99 @@
#ifndef __SERVICE_PROFILER_H__
#define __SERVICE_PROFILER_H__

#ifdef ONEDAL_KERNEL_PROFILER
/* Here if oneDAL kernel profiling is enabled in the build */
#include <ittnotify.h>
#endif

#define DAAL_ITTNOTIFY_CONCAT2(x, y) x##y
#define DAAL_ITTNOTIFY_CONCAT(x, y) DAAL_ITTNOTIFY_CONCAT2(x, y)

#define DAAL_ITTNOTIFY_UNIQUE_ID __LINE__

#define DAAL_ITTNOTIFY_SCOPED_TASK(name) \
daal::internal::ProfilerTask DAAL_ITTNOTIFY_CONCAT(__profiler_taks__, DAAL_ITTNOTIFY_UNIQUE_ID) = daal::internal::Profiler::startTask(#name);
daal::internal::ProfilerTask DAAL_ITTNOTIFY_CONCAT(__profiler_task__, DAAL_ITTNOTIFY_UNIQUE_ID) = daal::internal::Profiler::startTask(#name);

namespace daal
{
namespace internal
{
/**
* Defines a logical unit of work to be tracked by performance profilier.
*/
class ProfilerTask
{
public:
/**
* Constructs a task with a given name.
* \param[in] taskName Name of the task.
*/
ProfilerTask(const char * taskName);
~ProfilerTask();

private:
const char * _taskName;
#ifdef ONEDAL_KERNEL_PROFILER
/* Here if oneDAL kernel profiling is enabled */
__itt_string_handle * _handle; /* The task string handle */
__itt_domain * _domain; /* Pointer to the domain of the task */
#endif
};

// This class is a stub in the library. Its redefinition will be in Bechmarks
/**
* Global performance profiler.
*
* By default this class is a stub in the library and its redefinition will be in C++ Bechmarks.
* If oneDAL kernel profiling is enabled, the profiler uses Task API from <ittnotify.h>
*/
class Profiler
{
public:
/**
* Start the task to be profiled.
* \param[in] taskName Name of the task.
*/
static ProfilerTask startTask(const char * taskName);

/**
* Start the task to profile.
* \param[in] taskName Name of the task.
*/
static void endTask(const char * taskName);

#ifdef ONEDAL_KERNEL_PROFILER
/* Here if oneDAL kernel profiling is enabled */

/**
* Get pointer to a global profiler state.
* \return Pointer to a global profiler state.
*/
static Profiler * getInstance()
{
static Profiler instance;
return &instance;
}

/**
* Get pointer to the ITT domain associated with the profiler.
* \return Pointer to the ITT domain.
*/
static __itt_domain * getDomain()
{
return (getInstance())->_domain;
}

private:
/**
* Construct the profiler.
*/
Profiler()
{
_domain = __itt_domain_create("oneDAL");
}
~Profiler() {}
__itt_domain * _domain; /* Pointer to the ITT domain */
#endif
};

} // namespace internal
Expand Down
28 changes: 20 additions & 8 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,15 @@ endif

include dev/make/deps.$(BACKEND_CONFIG).mk

#=============================== VTune SDK folders ======================================

ifeq ($(REQPROFILE), yes)
-DPROFILER := -DONEDAL_KERNEL_PROFILER
VTUNESDK.include := $(VTUNE_PROFILER_DIR)/sdk/include
VTUNESDK.libia := $(if $(OS_is_lnx), $(VTUNE_PROFILER_DIR)/sdk/lib64,)
VTUNESDK.LIBS_A := $(if $(OS_is_lnx), $(VTUNESDK.libia)/libittnotify.a,)
endif

#===============================================================================
# Release library names
#===============================================================================
Expand Down Expand Up @@ -436,9 +445,11 @@ CORE.srcdirs := $(CORE.SERV.srcdir) $(CORE.srcdir) \
$(CPPDIR.daal)/src/data_management

CORE.incdirs.common := $(RELEASEDIR.include) $(CPPDIR.daal) $(WORKDIR)
CORE.incdirs.thirdp := $(daaldep.math_backend.incdir) $(TBBDIR.include)
CORE.incdirs.thirdp := $(daaldep.math_backend.incdir) $(VTUNESDK.include) $(TBBDIR.include)
CORE.incdirs := $(CORE.incdirs.common) $(CORE.incdirs.thirdp)

$(info CORE.incdirs: $(CORE.incdirs))

containing = $(foreach v,$2,$(if $(findstring $1,$v),$v))
notcontaining = $(foreach v,$2,$(if $(findstring $1,$v),,$v))
cpy = cp -fp "$<" "$@"
Expand Down Expand Up @@ -479,7 +490,7 @@ $(CORE.tmpdir_a)/$(core_a:%.$a=%_link.txt): $(CORE.objs_a) | $(CORE.tmpdir_a)/.
$(CORE.tmpdir_a)/$(core_a:%.$a=%_link.$a): LOPT:=
$(CORE.tmpdir_a)/$(core_a:%.$a=%_link.$a): $(CORE.tmpdir_a)/$(core_a:%.$a=%_link.txt) | $(CORE.tmpdir_a)/. ; $(LINK.STATIC)
$(WORKDIR.lib)/$(core_a): LOPT:=
$(WORKDIR.lib)/$(core_a): $(daaldep.math_backend.ext) $(CORE.tmpdir_a)/$(core_a:%.$a=%_link.$a) ; $(LINK.STATIC)
$(WORKDIR.lib)/$(core_a): $(daaldep.math_backend.ext) $(VTUNESDK.LIBS_A) $(CORE.tmpdir_a)/$(core_a:%.$a=%_link.$a) ; $(LINK.STATIC)

$(WORKDIR.lib)/$(core_y): LOPT += $(-fPIC)
$(WORKDIR.lib)/$(core_y): LOPT += $(daaldep.rt.seq)
Expand All @@ -488,11 +499,11 @@ ifdef OS_is_win
$(WORKDIR.lib)/$(core_y:%.$(MAJORBINARY).dll=%_dll.lib): $(WORKDIR.lib)/$(core_y)
endif
$(CORE.tmpdir_y)/$(core_y:%.$y=%_link.txt): $(CORE.objs_y) $(if $(OS_is_win),$(CORE.tmpdir_y)/dll.res,) | $(CORE.tmpdir_y)/. ; $(WRITE.PREREQS)
$(WORKDIR.lib)/$(core_y): $(daaldep.math_backend.ext) \
$(WORKDIR.lib)/$(core_y): $(daaldep.math_backend.ext) $(VTUNESDK.LIBS_A) \
$(CORE.tmpdir_y)/$(core_y:%.$y=%_link.txt) ; $(LINK.DYNAMIC) ; $(LINK.DYNAMIC.POST)

$(CORE.objs_a): $(CORE.tmpdir_a)/inc_a_folders.txt
$(CORE.objs_a): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-DMKL_ILP64)
$(CORE.objs_a): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-DMKL_ILP64) $(-DPROFILER)
$(CORE.objs_a): COPT += -D__TBB_NO_IMPLICIT_LINKAGE -DDAAL_NOTHROW_EXCEPTIONS \
-DDAAL_HIDE_DEPRECATED -DTBB_USE_ASSERT=0 -D_ENABLE_ATOMIC_ALIGNMENT_FIX \
$(if $(CHECK_DLL_SIG),-DDAAL_CHECK_DLL_SIG)
Expand All @@ -501,7 +512,7 @@ $(CORE.objs_a): COPT += @$(CORE.tmpdir_a)/inc_a_folders.txt
$(eval $(call append_uarch_copt,$(CORE.objs_a)))

$(CORE.objs_y): $(CORE.tmpdir_y)/inc_y_folders.txt
$(CORE.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-DMKL_ILP64)
$(CORE.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-DMKL_ILP64) $(-DPROFILER)
$(CORE.objs_y): COPT += -D__DAAL_IMPLEMENTATION \
-D__TBB_NO_IMPLICIT_LINKAGE -DDAAL_NOTHROW_EXCEPTIONS \
-DDAAL_HIDE_DEPRECATED -DTBB_USE_ASSERT=0 -D_ENABLE_ATOMIC_ALIGNMENT_FIX \
Expand Down Expand Up @@ -552,7 +563,7 @@ PARAMETERS.tmpdir_a.dpc := $(WORKDIR)/parameters_dpc_static
PARAMETERS.tmpdir_y.dpc := $(WORKDIR)/parameters_dpc_dynamic

ONEAPI.incdirs.common := $(CPPDIR)
ONEAPI.incdirs.thirdp := $(CORE.incdirs.common) $(daaldep.math_backend_oneapi.incdir) $(TBBDIR.include)
ONEAPI.incdirs.thirdp := $(CORE.incdirs.common) $(daaldep.math_backend_oneapi.incdir) $(VTUNESDK.include) $(TBBDIR.include)
ONEAPI.incdirs := $(ONEAPI.incdirs.common) $(CORE.incdirs.thirdp) $(ONEAPI.incdirs.thirdp)

ONEAPI.dispatcher_cpu = $(WORKDIR)/oneapi/dal/_dal_cpu_dispatcher_gen.hpp
Expand Down Expand Up @@ -697,7 +708,7 @@ $(ONEAPI.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DMKL_ILP64) $(-DEBC) $(-E
$(eval $(call update_copt_from_dispatcher_tag,$(ONEAPI.objs_y)))

# Note: The libonedal_dpc.so library does not support debug mode.
# When compiling with the debug flag $(-DEBC_DPCPP), linking with libonedal_dpc.so may cause indefinite linking times
# When compiling with the debug flag $(-DEBC_DPCPP), linking with libonedal_dpc.so may cause indefinite linking times
# due to the extensive processing of debug information. For debugging, please use the static library version (libonedal_dpc.a).
$(ONEAPI.objs_y.dpc): $(ONEAPI.dispatcher_cpu) $(ONEAPI.tmpdir_y.dpc)/inc_y_folders.txt
$(ONEAPI.objs_y.dpc): COPT += $(-fPIC) $(-cxx17) $(-Zl_DPCPP) $(-DMKL_ILP64) $(-EHsc) $(pedantic.opts.dpcpp) \
Expand Down Expand Up @@ -1103,7 +1114,8 @@ Flags:
possible values: $(CORE.ALGORITHMS.CUSTOM.AVAILABLE)
REQCPU - list of CPU optimizations to be included into library
possible values: $(CPUs)
REQDBG - Flag that enables build in debug mode
REQDBG - flag that enables build in debug mode
REQPROFILE - flag that enables kernel profiling using <ittnotify.h>
endef

daal_dbg:
Expand Down

0 comments on commit f32ae79

Please sign in to comment.