Skip to content

Commit

Permalink
Merge pull request IntelPython#1236 from IntelPython/feature/enable_s…
Browse files Browse the repository at this point in the history
…pirv_to_device_caching

Enable SPIR-V to device caching
  • Loading branch information
ZzEeKkAa authored Dec 12, 2023
2 parents 11e245c + a8178c6 commit 8fc7fbd
Show file tree
Hide file tree
Showing 12 changed files with 412 additions and 50 deletions.
30 changes: 30 additions & 0 deletions LICENSES.third-party
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
The numba-dpex source tree includes vendored libraries governed by the following
licenses.


boost hash.hpp header
---------------------

Boost Software License - Version 1.0 - August 17th, 2003

Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:

The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
8 changes: 8 additions & 0 deletions numba_dpex/core/runtime/_dpexrt_python.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "_queuestruct.h"
#include "_usmarraystruct.h"

#include "experimental/kernel_caching.h"
#include "experimental/nrt_reserve_meminfo.h"
#include "numba/core/runtime/nrt_external.h"

Expand Down Expand Up @@ -1493,6 +1494,8 @@ static PyObject *build_c_helpers_dict(void)
_declpointer("DPEXRT_sycl_event_init", &DPEXRT_sycl_event_init);
_declpointer("DPEXRT_nrt_acquire_meminfo_and_schedule_release",
&DPEXRT_nrt_acquire_meminfo_and_schedule_release);
_declpointer("DPEXRT_build_or_get_kernel", &DPEXRT_build_or_get_kernel);
_declpointer("DPEXRT_kernel_cache_size", &DPEXRT_kernel_cache_size);

#undef _declpointer
return dct;
Expand Down Expand Up @@ -1563,6 +1566,11 @@ MOD_INIT(_dpexrt_python)
PyModule_AddObject(
m, "DPEXRT_nrt_acquire_meminfo_and_schedule_release",
PyLong_FromVoidPtr(&DPEXRT_nrt_acquire_meminfo_and_schedule_release));
PyModule_AddObject(m, "DPEXRT_build_or_get_kernel",
PyLong_FromVoidPtr(&DPEXRT_build_or_get_kernel));
PyModule_AddObject(m, "DPEXRT_kernel_cache_size",
PyLong_FromVoidPtr(&DPEXRT_kernel_cache_size));

PyModule_AddObject(m, "c_helpers", build_c_helpers_dict());
return MOD_SUCCESS_VAL(m);
}
53 changes: 53 additions & 0 deletions numba_dpex/core/runtime/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,3 +471,56 @@ def acquire_meminfo_and_schedule_release(
ret = builder.call(fn, args)

return ret

def build_or_get_kernel(self, builder: llvmir.IRBuilder, args):
"""Inserts LLVM IR to call build_or_get_kernel.
DPCTLSyclKernelRef
DPEXRT_build_or_get_kernel(
const DPCTLSyclContextRef ctx,
const DPCTLSyclDeviceRef dev,
size_t il_hash,
const char *il,
size_t il_length,
const char *compile_opts,
const char *kernel_name,
);
"""
mod = builder.module

func_ty = llvmir.FunctionType(
cgutils.voidptr_t,
[
cgutils.voidptr_t,
cgutils.voidptr_t,
llvmir.IntType(64),
cgutils.voidptr_t,
llvmir.IntType(64),
cgutils.voidptr_t,
cgutils.voidptr_t,
],
)
fn = cgutils.get_or_insert_function(
mod, func_ty, "DPEXRT_build_or_get_kernel"
)
ret = builder.call(fn, args)

return ret

def kernel_cache_size(self, builder: llvmir.IRBuilder):
"""Inserts LLVM IR to call kernel_cache_size.
size_t DPEXRT_kernel_cache_size();
"""
fn = cgutils.get_or_insert_function(
builder.module,
llvmir.FunctionType(
llvmir.IntType(64),
[],
),
"DPEXRT_kernel_cache_size",
)

return builder.call(fn, [])
111 changes: 111 additions & 0 deletions numba_dpex/core/runtime/experimental/kernel_caching.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0

#include "kernel_caching.h"
#include <unordered_map>

extern "C"
{
#include "dpctl_capi.h"
#include "dpctl_sycl_interface.h"

#include "_dbg_printer.h"

#include "numba/core/runtime/nrt_external.h"
}

#include "syclinterface/dpctl_sycl_type_casters.hpp"
#include "tools/boost_hash.hpp"
#include "tools/dpctl.hpp"

using CacheKey = std::tuple<DPCTLSyclContextRef, DPCTLSyclDeviceRef, size_t>;

namespace std
{
template <> struct hash<CacheKey>
{
size_t operator()(const CacheKey &ck) const
{
std::size_t seed = 0;
boost::hash_combine(seed, std::get<DPCTLSyclDeviceRef>(ck));
boost::hash_combine(seed, std::get<DPCTLSyclContextRef>(ck));
boost::hash_detail::hash_combine_impl(seed, std::get<size_t>(ck));
return seed;
}
};
template <> struct equal_to<CacheKey>
{
constexpr bool operator()(const CacheKey &lhs, const CacheKey &rhs) const
{
return DPCTLDevice_AreEq(std::get<DPCTLSyclDeviceRef>(lhs),
std::get<DPCTLSyclDeviceRef>(rhs)) &&
DPCTLContext_AreEq(std::get<DPCTLSyclContextRef>(lhs),
std::get<DPCTLSyclContextRef>(rhs)) &&
std::get<size_t>(lhs) == std::get<size_t>(rhs);
}
};
} // namespace std

// TODO: add cache cleaning
// https://github.com/IntelPython/numba-dpex/issues/1240
std::unordered_map<CacheKey, DPCTLSyclKernelRef> sycl_kernel_cache =
std::unordered_map<CacheKey, DPCTLSyclKernelRef>();

template <class M, class Key, class F>
typename M::mapped_type &get_else_compute(M &m, Key const &k, F f)
{
typedef typename M::mapped_type V;
std::pair<typename M::iterator, bool> r =
m.insert(typename M::value_type(k, V()));
V &v = r.first->second;
if (r.second) {
DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: building kernel.\n"););
f(v);
}
else {
DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: using cached kernel.\n"););
DPCTLDevice_Delete(std::get<DPCTLSyclDeviceRef>(k));
DPCTLContext_Delete(std::get<DPCTLSyclContextRef>(k));
}
return v;
}

extern "C"
{
DPCTLSyclKernelRef DPEXRT_build_or_get_kernel(const DPCTLSyclContextRef ctx,
const DPCTLSyclDeviceRef dev,
size_t il_hash,
const char *il,
size_t il_length,
const char *compile_opts,
const char *kernel_name)
{
DPEXRT_DEBUG(
drt_debug_print("DPEXRT-DEBUG: in build or get kernel.\n"););

CacheKey key = std::make_tuple(ctx, dev, il_hash);

DPEXRT_DEBUG(auto ctx_hash = std::hash<DPCTLSyclContextRef>{}(ctx);
auto dev_hash = std::hash<DPCTLSyclDeviceRef>{}(dev);
drt_debug_print("DPEXRT-DEBUG: key hashes: %d %d %d.\n",
ctx_hash, dev_hash, il_hash););

auto k_ref = get_else_compute(
sycl_kernel_cache, key,
[ctx, dev, il, il_length, compile_opts,
kernel_name](DPCTLSyclKernelRef &k_ref) {
auto kb_ref = DPCTLKernelBundle_CreateFromSpirv(
ctx, dev, il, il_length, compile_opts);
k_ref = DPCTLKernelBundle_GetKernel(kb_ref, kernel_name);
DPCTLKernelBundle_Delete(kb_ref);
});

DPEXRT_DEBUG(drt_debug_print("DPEXRT-DEBUG: kernel hash size: %d.\n",
sycl_kernel_cache.size()););

return DPCTLKernel_Copy(k_ref);
}

size_t DPEXRT_kernel_cache_size() { return sycl_kernel_cache.size(); }
}
54 changes: 54 additions & 0 deletions numba_dpex/core/runtime/experimental/kernel_caching.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// SPDX-FileCopyrightText: 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0

//===----------------------------------------------------------------------===//
///
/// \file
/// Defines dpex run time function(s) that cache kernel on device.
///
//===----------------------------------------------------------------------===//

#pragma once

#include "dpctl_capi.h"
#include "dpctl_sycl_interface.h"

#ifdef __cplusplus
extern "C"
{
#endif
/*!
* @brief returns dpctl kernel reference for the SPIRV file on particular
* device. Compiles only first time, all others will use cache for the same
* input. It steals reference to context and device because we need to keep
* it alive for cache keys.
*
* @param ctx Context reference,
* @param dev Device reference,
* @param il_hash Hash of the SPIRV binary data,
* @param il SPIRV binary data,
* @param il_length SPIRV binary data size,
* @param compile_opts compile options,
* @param kernel_name kernel name inside SPIRV binary data to return
* reference to.
*
* @return {return} Kernel reference to the compiled SPIR-V.
*/
DPCTLSyclKernelRef DPEXRT_build_or_get_kernel(const DPCTLSyclContextRef ctx,
const DPCTLSyclDeviceRef dev,
size_t il_hash,
const char *il,
size_t il_length,
const char *compile_opts,
const char *kernel_name);

/*!
* @brief returns cache size. Intended for test purposes only
*
* @return {return} Kernel cache size.
*/
size_t DPEXRT_kernel_cache_size();
#ifdef __cplusplus
}
#endif
2 changes: 1 addition & 1 deletion numba_dpex/core/runtime/experimental/nrt_reserve_meminfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//===----------------------------------------------------------------------===//
///
/// \file
/// Defines dpctl style function(s) that interruct with nrt meminfo and sycl.
/// Defines dpctl style function(s) that interact with nrt meminfo and sycl.
///
//===----------------------------------------------------------------------===//

Expand Down
38 changes: 38 additions & 0 deletions numba_dpex/core/runtime/experimental/tools/boost_hash.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2005-2014 Daniel James.
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

// Based on Peter Dimov's proposal
// http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2005/n1756.pdf
// issue 6.18.
//
// This also contains public domain code from MurmurHash. From the
// MurmurHash header:

// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.

// 2023 Intel Corporation
// Copied hash_combine and hash_combine_impl from boost
// (https://www.boost.org/doc/libs/1_76_0/boost/container_hash/hash.hpp) and
// changed hash_combine to use std::hash<T> instead of boost::hash<T>.

#include <functional>

namespace boost
{
namespace hash_detail
{
template <typename SizeT>
inline void hash_combine_impl(SizeT &seed, SizeT value)
{
seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2);
}
} // namespace hash_detail

template <class T> inline void hash_combine(std::size_t &seed, T const &v)
{
std::hash<T> hasher;
return boost::hash_detail::hash_combine_impl(seed, hasher(v));
}
} // namespace boost
24 changes: 24 additions & 0 deletions numba_dpex/core/runtime/experimental/tools/dpctl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0

#include "dpctl.hpp"
#include <CL/sycl.hpp>

namespace std
{

size_t
hash<DPCTLSyclDeviceRef>::operator()(const DPCTLSyclDeviceRef &DRef) const
{
using dpctl::syclinterface::unwrap;
return hash<sycl::device>()(*unwrap<sycl::device>(DRef));
}

size_t
hash<DPCTLSyclContextRef>::operator()(const DPCTLSyclContextRef &CRef) const
{
using dpctl::syclinterface::unwrap;
return hash<sycl::context>()(*unwrap<sycl::context>(CRef));
}
} // namespace std
26 changes: 26 additions & 0 deletions numba_dpex/core/runtime/experimental/tools/dpctl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// SPDX-FileCopyrightText: 2020 - 2023 Intel Corporation
//
// SPDX-License-Identifier: Apache-2.0

//===----------------------------------------------------------------------===//
///
/// \file
/// Defines overloads to dpctl library that eventually must be ported there.
///
//===----------------------------------------------------------------------===//

#pragma once
#include "syclinterface/dpctl_sycl_type_casters.hpp"

namespace std
{
template <> struct hash<DPCTLSyclDeviceRef>
{
size_t operator()(const DPCTLSyclDeviceRef &DRef) const;
};

template <> struct hash<DPCTLSyclContextRef>
{
size_t operator()(const DPCTLSyclContextRef &CRef) const;
};
} // namespace std
Loading

0 comments on commit 8fc7fbd

Please sign in to comment.