From 326e2f9619a902accc3a829980dc2e6fb9b61a0d Mon Sep 17 00:00:00 2001 From: sitaowang1998 Date: Fri, 29 Nov 2024 22:54:45 -0500 Subject: [PATCH] feat: Add client interface. (#16) Co-authored-by: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- src/spider/.clang-format | 2 +- src/spider/CMakeLists.txt | 47 ++++++++- src/spider/client/Data.hpp | 82 +++++++++++++++ src/spider/client/Driver.hpp | 139 +++++++++++++++++++++++++ src/spider/client/Exception.hpp | 37 +++++++ src/spider/client/Job.hpp | 72 +++++++++++++ src/spider/client/TaskContext.hpp | 117 +++++++++++++++++++++ src/spider/client/TaskGraph.hpp | 17 +++ src/spider/client/spider.hpp | 13 +++ src/spider/client/task.hpp | 50 +++++++++ src/spider/client/type_utils.hpp | 31 ++++++ src/spider/core/Data.hpp | 5 +- src/spider/core/Serializer.hpp | 20 +++- src/spider/core/Task.hpp | 5 +- src/spider/core/TaskGraph.hpp | 8 +- src/spider/storage/DataStorage.hpp | 3 +- src/spider/storage/MetadataStorage.hpp | 3 +- src/spider/storage/MysqlStorage.cpp | 22 ++-- src/spider/storage/MysqlStorage.hpp | 7 +- src/spider/worker/FunctionManager.hpp | 12 +-- tests/worker/test-FunctionManager.cpp | 6 +- 21 files changed, 662 insertions(+), 36 deletions(-) create mode 100644 src/spider/client/Data.hpp create mode 100644 src/spider/client/Driver.hpp create mode 100644 src/spider/client/Exception.hpp create mode 100644 src/spider/client/Job.hpp create mode 100644 src/spider/client/TaskContext.hpp create mode 100644 src/spider/client/TaskGraph.hpp create mode 100644 src/spider/client/spider.hpp create mode 100644 src/spider/client/task.hpp create mode 100644 src/spider/client/type_utils.hpp diff --git a/src/spider/.clang-format b/src/spider/.clang-format index 910a765..06150ee 100644 --- a/src/spider/.clang-format +++ b/src/spider/.clang-format @@ -10,7 +10,7 @@ IncludeCategories: # Ex: # - Regex: "<(fmt|spdlog)" # Priority: 3 - - Regex: "^<(clp)" + - Regex: "^<(absl|boost|catch2|fmt|mariadb|msgpack|spdlog)" Priority: 3 # C system headers - Regex: "^<.+\\.h>" diff --git a/src/spider/CMakeLists.txt b/src/spider/CMakeLists.txt index 12281cc..3bc827e 100644 --- a/src/spider/CMakeLists.txt +++ b/src/spider/CMakeLists.txt @@ -28,9 +28,54 @@ target_link_libraries( ) target_link_libraries(spider_core PRIVATE fmt::fmt) +set(SPIDER_CLIENT_SHARED_SOURCES CACHE INTERNAL "spider client shared source files") + +set(SPIDER_CLIENT_SHARED_HEADERS + client/Data.hpp + client/Driver.hpp + client/task.hpp + client/TaskContext.hpp + client/TaskGraph.hpp + client/type_utils.hpp + client/Exception.hpp + CACHE INTERNAL + "spider client shared header files" +) + +add_library(spider_client_lib) +target_sources(spider_client_lib PRIVATE ${SPIDER_CLIENT_SHARED_SOURCES}) +target_sources(spider_client_lib PUBLIC ${SPIDER_CLIENT_SHARED_HEADERS}) +target_link_libraries( + spider_client_lib + PUBLIC + Boost::boost + absl::flat_hash_map +) + +set(SPIDER_CLIENT_SOURCES CACHE INTERNAL "spider client source files") + +set(SPIDER_CLIENT_HEADERS + client/spider.hpp + client/Job.hpp + CACHE INTERNAL + "spider client header files" +) + +add_library(spider_client) +target_sources(spider_client PRIVATE ${SPIDER_CLIENT_SOURCES}) +target_sources(spider_client PUBLIC ${SPIDER_CLIENT_HEADERS}) +target_link_libraries(spider_client PRIVATE spider_core) +target_link_libraries(spider_client PUBLIC spider_client_lib) +add_library(spider::spider ALIAS spider_client) + set(SPIDER_WORKER_SOURCES worker/worker.cpp CACHE INTERNAL "spider worker source files") add_executable(spider_worker) target_sources(spider_worker PRIVATE ${SPIDER_WORKER_SOURCES}) -target_link_libraries(spider_worker PRIVATE spider_core) +target_link_libraries( + spider_worker + PRIVATE + spider_core + spider_client_lib +) add_executable(spider::worker ALIAS spider_worker) diff --git a/src/spider/client/Data.hpp b/src/spider/client/Data.hpp new file mode 100644 index 0000000..ba162c0 --- /dev/null +++ b/src/spider/client/Data.hpp @@ -0,0 +1,82 @@ +#ifndef SPIDER_CLIENT_DATA_HPP +#define SPIDER_CLIENT_DATA_HPP + +#include +#include +#include +#include + +#include "../core/Serializer.hpp" + +namespace spider { +class DataImpl; + +/** + * A representation of data stored on external storage. This class allows the user to define: + * - how the data should be cleaned up (garbage collected) once it is no longer referenced. + * - the locality of the data. + * + * Example: + * @code{.cpp} + * auto disk_file_data = spider::Data::Builder() + * .set_locality({"node_address"}, true) + * .set_cleanup_func([](std::string const& path) { std::filesystem::remove(path); }) + * .build("/path/of/file"); + * @endcode + * + * @tparam T Type of the value. + */ +template +class Data { +public: + /** + * @return The stored value. + */ + auto get() -> T; + + /** + * Sets the data's locality, indicated by the nodes that contain the data. + * + * @param nodes + * @param hard Whether the data is only accessible from the given nodes (i.e., the locality is a + * hard requirement). + */ + void set_locality(std::vector const& nodes, bool hard); + + class Builder { + public: + /** + * Sets the data's locality, indicated by the nodes that contain the data. + * + * @param nodes + * @param hard Whether the data is only accessible from the given nodes (i.e., the locality + * is a hard requirement. + * @return self + */ + auto set_locality(std::vector const& nodes, bool hard) -> Builder&; + + /** + * Sets the cleanup function for the data. This function will be called when the data is no + * longer referenced. + * + * @param f + * @return self + */ + auto set_cleanup_func(std::function const& f) -> Builder&; + + /** + * Builds the data object. + * + * @param t Value of the data + * @return The built object. + * @throw spider::ConnectionException + */ + auto build(T const& t) -> Data; + }; + +private: + std::unique_ptr m_impl; +}; +} // namespace spider + +#endif // SPIDER_CLIENT_DATA_HPP diff --git a/src/spider/client/Driver.hpp b/src/spider/client/Driver.hpp new file mode 100644 index 0000000..96b1c3b --- /dev/null +++ b/src/spider/client/Driver.hpp @@ -0,0 +1,139 @@ +#ifndef SPIDER_CLIENT_DRIVER_HPP +#define SPIDER_CLIENT_DRIVER_HPP + +#include +#include +#include + +#include + +#include "../worker/FunctionManager.hpp" +#include "Job.hpp" +#include "task.hpp" +#include "TaskGraph.hpp" + +/** + * Registers a Task function with Spider + * @param func + */ +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define SPIDER_REGISTER_TASK(func) SPIDER_WORKER_REGISTER_TASK(func) + +/** + * Registers a timed Task function with Spider + * @param func + * @param timeout The time after which the task is considered a straggler, triggering Spider to + * replicate the task. TODO: Use the timeout. + */ +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +#define SPIDER_REGISTER_TASK_TIMEOUT(func, timeout) SPIDER_WORKER_REGISTER_TASK(func) + +namespace spider { + +/** + * An interface for a client to interact with Spider and create jobs, access the kv-store, etc. + */ +class Driver { +public: + /** + * @param storage_url + * @throw spider::ConnectionException + */ + explicit Driver(std::string const& storage_url); + + /** + * @param storage_url + * @param id A caller-specified ID to associate with this driver. All jobs created by this + * driver will be associated with this ID. This may be useful if, for instance, the caller + * fails and then needs to reconnect and retrieve all previously created jobs. NOTE: It is + * undefined behaviour for two clients to concurrently use the same ID. + * @throw spider::ConnectionException + * @throw spider::DriverIdInUseException + */ + Driver(std::string const& storage_url, boost::uuids::uuid id); + + /** + * Inserts the given key-value pair into the key-value store, overwriting any existing value. + * + * @param key + * @param value + * @throw spider::ConnectionException + */ + auto kv_store_insert(std::string const& key, std::string const& value); + + /** + * Gets the value corresponding to the given key. + * + * NOTE: Callers cannot get values created by other clients, but they can get values created by + * previous `Driver` instances with the same client ID. + * + * @param key + * @return An optional containing the value if the given key exists, or `std::nullopt` + * otherwise. + * @throw spider::ConnectionException + */ + auto kv_store_get(std::string const& key) -> std::optional; + + /** + * Binds inputs to a task. Inputs can be: + * - the outputs of a task or task graph, forming dependencies between tasks. + * - any value that satisfies the `TaskIo` concept. + * + * @tparam ReturnType Return type for both the task and the resulting `TaskGraph`. + * @tparam TaskParams + * @tparam Inputs + * @tparam GraphParams + * @param task + * @param inputs Inputs to bind to `task`. If an input is a `Task` or `TaskGraph`, their + * outputs will be bound to the inputs of `task`. + * @return A `TaskGraph` of the inputs bound to `task`. + */ + template < + TaskIo ReturnType, + TaskIo... TaskParams, + RunnableOrTaskIo... Inputs, + TaskIo... GraphParams> + auto bind(TaskFunction const& task, Inputs&&... inputs) + -> TaskGraph; + + /** + * Starts running a task with the given inputs on Spider. + * + * @tparam ReturnType + * @tparam Params + * @param task + * @param inputs + * @return A job representing the running task. + * @throw spider::ConnectionException + */ + template + auto + start(TaskFunction const& task, Params&&... inputs) -> Job; + + /** + * Starts running a task graph with the given inputs on Spider. + * + * @tparam ReturnType + * @tparam Params + * @param graph + * @param inputs + * @return A job representing the running task graph. + * @throw spider::ConnectionException + */ + template + auto + start(TaskGraph const& graph, Params&&... inputs) -> Job; + + /** + * Gets all scheduled and running jobs started by drivers with the current client's ID. + * + * NOTE: This method will not return jobs that have finished. + * + * @return IDs of the jobs. + * @throw spider::ConnectionException + */ + auto get_jobs() -> std::vector; +}; +} // namespace spider + +#endif // SPIDER_CLIENT_DRIVER_HPP diff --git a/src/spider/client/Exception.hpp b/src/spider/client/Exception.hpp new file mode 100644 index 0000000..259af90 --- /dev/null +++ b/src/spider/client/Exception.hpp @@ -0,0 +1,37 @@ +#ifndef SPIDER_CLIENT_EXCEPTION_HPP +#define SPIDER_CLIENT_EXCEPTION_HPP + +#include +#include + +#include +#include +#include + +namespace spider { +class ConnectionException final : public std::exception { +public: + explicit ConnectionException(std::string const& addr) + : m_message(fmt::format("Cannot connect to storage {}.", addr)) {} + + [[nodiscard]] auto what() const noexcept -> char const* override { return m_message.c_str(); } + +private: + std::string m_message; +}; + +class DriverIdInUseException final : public std::exception { +public: + explicit DriverIdInUseException(boost::uuids::uuid id) + : m_message( + fmt::format("Driver ID {} is currently in use.", boost::uuids::to_string(id)) + ) {} + + [[nodiscard]] auto what() const noexcept -> char const* override { return m_message.c_str(); } + +private: + std::string m_message; +}; +} // namespace spider + +#endif // SPIDER_CLIENT_EXCEPTION_HPP diff --git a/src/spider/client/Job.hpp b/src/spider/client/Job.hpp new file mode 100644 index 0000000..79e3244 --- /dev/null +++ b/src/spider/client/Job.hpp @@ -0,0 +1,72 @@ +#ifndef SPIDER_CLIENT_JOB_HPP +#define SPIDER_CLIENT_JOB_HPP + +#include +#include +#include + +#include "task.hpp" + +namespace spider { +// TODO: Use std::expected or Boost's outcome so that the user can get the result of the job in one +// call rather than the current error-prone approach which requires that the user check the job's +// status and then call the relevant method. + +enum class JobStatus : uint8_t { + Running, + Succeeded, + Failed, + Cancelled, +}; + +/** + * A running task graph. + * + * @tparam ReturnType + */ +template +class Job { +public: + /** + * Waits for the job to complete. + * + * @throw spider::ConnectionException + */ + auto wait_complete(); + + /** + * Cancels the job and waits for the job's tasks to be cancelled. + * + * @throw spider::ConnectionException + */ + auto cancel(); + + /** + * @return Status of the job. + * @throw spider::ConnectionException + */ + auto get_status() -> JobStatus; + + /** + * NOTE: It is undefined behavior to call this method for a job that is not in the `Succeeded` + * state. + * + * @return Result of the job. + * @throw spider::ConnectionException + */ + auto get_result() -> ReturnType; + + /** + * NOTE: It is undefined behavior to call this method for a job that is not in the `Failed` + * state. + * + * @return A pair: + * - the name of the task function that failed. + * - the error message sent from the task through `TaskContext::abort` or from Spider. + * @throw spider::ConnectionException + */ + auto get_error() -> std::pair; +}; +} // namespace spider + +#endif // SPIDER_CLIENT_JOB_HPP diff --git a/src/spider/client/TaskContext.hpp b/src/spider/client/TaskContext.hpp new file mode 100644 index 0000000..db83636 --- /dev/null +++ b/src/spider/client/TaskContext.hpp @@ -0,0 +1,117 @@ +#ifndef SPIDER_CLIENT_TASKCONTEXT_HPP +#define SPIDER_CLIENT_TASKCONTEXT_HPP + +#include +#include +#include + +#include + +#include "Job.hpp" +#include "task.hpp" +#include "TaskGraph.hpp" + +namespace spider { +/** + * TaskContext provides a task with all Spider functionalities, e.g. getting task instance id, + * accessing data storage, creating and waiting for new jobs, etc. + * TaskContext is provided as first argument to a task. + */ +class TaskContext { +public: + /** + * Aborts the current task and job. This function never returns. + * + * @param message The reason for the abort. + * @throw spider::ConnectionException + */ + [[noreturn]] auto abort(std::string const& message) -> void; + + /** + * @return ID of the current running task instance. + */ + [[nodiscard]] auto get_id() const -> boost::uuids::uuid; + + /** + * Inserts the given key-value pair into the key-value store, overwriting any existing value. + * + * @param key + * @param value + * @throw spider::ConnectionException + */ + auto kv_store_insert(std::string const& key, std::string const& value) -> void; + + /** + * Gets the value corresponding to the given key. + * + * NOTE: Callers cannot get values created by other tasks, but they can get values created by + * previous instances of the same task. + * + * @param key + * @return An optional containing the value if the given key exists, or `std::nullopt` + * otherwise. + * @throw spider::ConnectionException + */ + auto kv_store_get(std::string const& key) -> std::optional; + + /** + * Binds inputs to a task. Inputs can be: + * - the outputs of a task or task graph, forming dependencies between tasks. + * - any value that satisfies the `TaskIo` concept. + * + * @tparam ReturnType Return type for both the task and the resulting `TaskGraph`. + * @tparam TaskParams + * @tparam Inputs + * @tparam GraphParams + * @param task + * @param inputs Inputs to bind to `task`. If an input is a `Task` or `TaskGraph`, their + * outputs will be bound to the inputs of `task`. + * @return A `TaskGraph` of the inputs bound to `task`. + */ + template < + TaskIo ReturnType, + TaskIo... TaskParams, + RunnableOrTaskIo... Inputs, + TaskIo... GraphParams> + auto bind(TaskFunction const& task, Inputs&&... inputs) + -> TaskGraph; + + /** + * Starts running a task with the given inputs on Spider. + * + * @tparam ReturnType + * @tparam Params + * @param task + * @param inputs + * @return A job representing the running task. + * @throw spider::ConnectionException + */ + template + auto + start(TaskFunction const& task, Params&&... inputs) -> Job; + + /** + * Starts running a task graph with the given inputs on Spider. + * + * @tparam ReturnType + * @tparam Params + * @param graph + * @param inputs + * @return A job representing the running task graph. + * @throw spider::ConnectionException + */ + template + auto + start(TaskGraph const& graph, Params&&... inputs) -> Job; + + /** + * Gets all jobs started by this task. + * + * @return IDs of the jobs. + * @throw spider::ConnectionException + */ + auto get_jobs() -> std::vector; +}; +} // namespace spider + +#endif // SPIDER_CLIENT_TASKCONTEXT_HPP diff --git a/src/spider/client/TaskGraph.hpp b/src/spider/client/TaskGraph.hpp new file mode 100644 index 0000000..4da2c0a --- /dev/null +++ b/src/spider/client/TaskGraph.hpp @@ -0,0 +1,17 @@ +#ifndef SPIDER_CLIENT_TASKGRAPH_HPP +#define SPIDER_CLIENT_TASKGRAPH_HPP + +#include "task.hpp" + +namespace spider { +/** + * A TaskGraph represents a directed acyclic graph (DAG) of tasks. + * + * @tparam ReturnType + * @tparam Params + */ +template +class TaskGraph {}; +} // namespace spider + +#endif // SPIDER_CLIENT_TASKGRAPH_HPP diff --git a/src/spider/client/spider.hpp b/src/spider/client/spider.hpp new file mode 100644 index 0000000..67fd005 --- /dev/null +++ b/src/spider/client/spider.hpp @@ -0,0 +1,13 @@ +#ifndef SPIDER_CLIENT_SPIDER_HPP +#define SPIDER_CLIENT_SPIDER_HPP + +// IWYU pragma: begin_exports +#include "Data.hpp" +#include "Driver.hpp" +#include "Job.hpp" +#include "TaskContext.hpp" +#include "TaskGraph.hpp" + +// IWYU pragma: end_exports + +#endif // SPIDER_CLIENT_SPIDER_HPP diff --git a/src/spider/client/task.hpp b/src/spider/client/task.hpp new file mode 100644 index 0000000..0cecede --- /dev/null +++ b/src/spider/client/task.hpp @@ -0,0 +1,50 @@ +#ifndef SPIDER_CLIENT_TASK_HPP +#define SPIDER_CLIENT_TASK_HPP + +#include + +#include "../core/Serializer.hpp" +#include "Data.hpp" +#include "type_utils.hpp" + +namespace spider { + +/** + * Concept that represents the input to or output from a Task. + * + * @tparam T + */ +template +concept TaskIo = Serializable || cIsSpecializationV; + +// Forward declare `TaskContext` since `TaskFunction` takes `TaskContext` as a param, and +// `TaskContext` uses `TaskFunction` as a param in its methods. +class TaskContext; + +/** + * A function that can be run as a task on Spider. + * + * @tparam ReturnType + * @tparam TaskParams + */ +template +using TaskFunction = std::function; + +// Forward declare `TaskGraph` since `Runnable` takes `TaskGraph` as a param, and `TaskGraph` uses +// `TaskIo` defined in this header as its template params. +template +class TaskGraph; + +/** + * Concept for an object that's runnable on Spider. + * + * @tparam T + */ +template +concept Runnable = cIsSpecializationV || cIsSpecializationV; + +template +concept RunnableOrTaskIo = Runnable || TaskIo; +} // namespace spider + +#endif // SPIDER_CLIENT_TASK_HPP diff --git a/src/spider/client/type_utils.hpp b/src/spider/client/type_utils.hpp new file mode 100644 index 0000000..1a1a062 --- /dev/null +++ b/src/spider/client/type_utils.hpp @@ -0,0 +1,31 @@ +#ifndef SPIDER_CLIENT_TYPE_UTILS_HPP +#define SPIDER_CLIENT_TYPE_UTILS_HPP + +#include + +namespace spider { +// The template and partial specialization below check whether a given type is a specialization of +// a given type. +/** + * Template to check if a given type is specialization of a given template type. + * + * NOTE: This inherits from `std::false_type` so that by default, `Type` is not considered + * a specialization of `TemplateType`. The partial specialization of `IsSpecialization` below + * defines the case where `Type` is considered a specialization of `TemplateType`. + * + * @tparam Type + * @tparam template_type + */ +template class template_type> +struct IsSpecialization : public std::false_type {}; + +// Specialization of `IsSpecialization` that inherits from `std::true_type` only when the first +// template argument is a specialization (i.e., the same type with template parameters) of the +// second template argument. +template