From 0bd9ee288b02a007a6305383d2cd43f3e747f039 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Tue, 17 Dec 2024 08:50:48 -0500 Subject: [PATCH 01/12] docs: Add quick-start guide. --- docs/quick-start.md | 268 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 268 insertions(+) create mode 100644 docs/quick-start.md diff --git a/docs/quick-start.md b/docs/quick-start.md new file mode 100644 index 0000000..d3ec590 --- /dev/null +++ b/docs/quick-start.md @@ -0,0 +1,268 @@ +# Quick start + +Spider is a distributed system for executing user-defined tasks. It is designed to achieve low +latency, high throughput, and robust fault tolerance. + +The guide below briefly describes how to get started with running a task on Spider. + +To get started with Spider, you’ll need to: + +* Write a task +* Build the task into a shared library +* Write a client to manage the task +* Build the client +* Set up a Spider cluster +* Run the client + +# Requirements + +To run through the guide below, you'll need: + +* CMake +* GCC 10+ or Clang 7+ +* [Docker] + * If you're not running as root, ensure `docker` can be run + [without superuser privileges][docker-non-root]. + +# Writing a task + +In Spider, a task is C++ function that satisfies the following conditions: +* It is a non-member function. +* It takes one or more parameters: + * the first parameter must be a `TaskContext`. + * all other parameters must have types that conform to the `Serializable` or `Data` interfaces. +* It returns a value that conforms to the `Serializable` or `Data` interfaces. + +> [!NOTE] You don’t immediately need to understand the TaskContext, Serializable, or Data types as +> we'll explain them in later sections. + +For example, the task below computes and returns the sum of two integers. + +> [!NOTE] The task is split into a header file and an implementation file so that it can be loaded +> as a library in the worker, as we'll see in later sections. + +_tasks.hpp_ + +```c++ +#include + +// Task function prototype +/** + * @param context + * @param x + * @param y + * @return The sum of x and y. + */ +auto sum(spider::TaskContext& context, int x, int y) -> int; + +``` + +_tasks.cpp_ + +```c++ +#include "tasks.hpp" + +#include + +// Task function implementation +auto sum(spider::TaskContext& context, int x, int y) -> int { + return x + y; +} + +// Register the task with Spider +SPIDER_REGISTER_TASK(sum); + +``` + +The integer parameters and return value are `Serializable` values. +The `SPIDER_REGISTER_TASK` macro at the bottom of `tasks.cpp` is how we inform Spider that a +function should be treated as a task. + +# Building the task into a shared library + +In order for Spider to run a task, the task needs to be compiled into a shared library that Spider +can load. + +TODO: Instructions and an example showing how to compile tasks into a shared library. + +# Writing a client to manage the task + +To make Spider to run a task, we first need to write a client application. Generally, a client: + +1. connects to Spider; +2. submits the task for execution; +3. waits for its completion—whether it succeeds or fails; +4. and then handles the result. + +For example, the client below runs the `sum` task from the previous section and verifies its result. + +_client.cpp_ + +```c++ +#include +#include + +#include + +#include "tasks.hpp" + +auto main(int argc, char const* argv[]) -> int { + // Parse the storage backend URL from the command line arguments + if (argc < 2) { + std::cerr << "Usage: ./client " << '\n'; + return 1; + } + std::string storage_url{argv[1]}; + if (storage_url.empty()) { + std::cerr << "storage-backend-url cannot be empty." << '\n'; + } + + // Create a driver that connects to the Spider cluster + spider::Driver driver{storage_url}; + + // Submit the task for execution + spider::Job job = driver.start(sum, 2, 3); + + // Wait for the job to complete + job.wait_complete(); + + // Handle the job's success/failure + auto job_status = job.get_status(); + switch (job_status) { + case JobStatus::Succeeded: { + auto result = job_status.get_result(); + if (result == job_status.get_result()) { + return 0; + } else { + std::cerr << "`sum` returned unexpected result. Expected: 5. Actual: " << result + << '\n'. + return 1; + } + } + case JobStatus::Failed: + std::pair error_and_fn_name = job.get_error(); + std::cerr << "Job failed in function " << error_and_fn_name.second << " - " + << error_and_fn_name.first << '\n'; + return 1; + default: + std::cerr << "Job is in unexpected state - " << job_status << '\n'; + return 1; + } +} + +``` + +When we submit a task to Spider, Spider returns a `Job` , which represents a scheduled, running, or +completed task (or `TaskGraph`) in a Spider cluster. + +> [!NOTE] `Job`s and `TaskGraph`s will be explained in another guide. + +# Building the client + +The client can be compiled like any normal C++ application except that we need to link it to the +Spider client library. + +TODO: Instructions and an example for how to compile the client application. + +# Setting up a Spider cluster + +Before we can run the client, we need to start a Spider cluster. The simplest Spider cluster +consists of: + +* a storage backend; +* a scheduler instance; +* and a worker instance. + +## Setting up a storage backend + +Spider currently supports using MySQL or MariaDB as a storage backend. In this guide, we'll start +MariaDB in a Docker container: + +```shell +docker run \ + --detach \ + --rm \ + --name spider-storage \ + --env MARIADB_USER=spider \ + --env MARIADB_PASSWORD=password \ + --env MARIADB_DATABASE=spider-storage \ + --env MARIADB_ALLOW_EMPTY_ROOT_PASSWORD=true \ + --publish 3306:3306 mariadb:latest +``` + +> [!WARNING] When the container above is stopped, the database will be deleted. In production, you +> should set up a database instance with some form of data persistence. + +Alternatively, if you have an existing MySQL/MariaDB instance, you can use that as well. Simply +create a database and authorize a user to access it. + +## Setting up the scheduler + +To build the scheduler, run the following from the root of the project: + +```bash +cmake -S . -B build +cmake --build build --parallel $(nproc) --target spider_scheduler +``` + +To start the scheduler, run: + +```bash +build/src/spider/spider_schdeuler \ + --storage_url \ + "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ + --port 6000 +``` + +NOTE: + +* If you used a different set of arguments to set up the storage backend, ensure you update the + `storage_url` argument ihn the command. +* If the scheduler fails to bind to port `6000`, change the port in the command and try again. + +## Setting up a worker + +To build the worker, run the following from the root of the project: + +```bash +cmake -S . -B build +cmake --build build --parallel $(nproc) --target spider_worker +``` + +To start a worker, run: + +```bash +build/src/spider/spider_worker \ + --storage_url \ + "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ + --port 6000 +``` + +NOTE: + +If you used a different set of arguments to set up the storage backend, ensure you update the +`storage_url` argument in the command. + +> [!TIP] You can start multiple workers to increase the number of concurrent tasks that can be run +> on the cluster. + +# Running the client + +To run the client: + +```bash +./client "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" +``` + +NOTE: + +If you used a different set of arguments to set up the storage backend, ensure you update the +storage backend URL in the command. + +# Next steps + +In future guides, we'll explain how to write more complex tasks as well as how to leverage Spider's +support for fault tolerance. + +[Docker]: https://docs.docker.com/engine/install/ +[docker-non-root]: https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user From 3ef6f42590a17b9198a51b934537c664403b8579 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Tue, 17 Dec 2024 08:54:56 -0500 Subject: [PATCH 02/12] bash -> shell; Add newline after GitHub alert type. --- docs/quick-start.md | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index d3ec590..b89eb40 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -33,13 +33,15 @@ In Spider, a task is C++ function that satisfies the following conditions: * all other parameters must have types that conform to the `Serializable` or `Data` interfaces. * It returns a value that conforms to the `Serializable` or `Data` interfaces. -> [!NOTE] You don’t immediately need to understand the TaskContext, Serializable, or Data types as -> we'll explain them in later sections. +> [!NOTE] +> You don’t immediately need to understand the TaskContext, Serializable, or Data types as we'll +> explain them in later sections. For example, the task below computes and returns the sum of two integers. -> [!NOTE] The task is split into a header file and an implementation file so that it can be loaded -> as a library in the worker, as we'll see in later sections. +> [!NOTE] +> The task is split into a header file and an implementation file so that it can be loaded as a +> library in the worker, as we'll see in later sections. _tasks.hpp_ @@ -155,7 +157,8 @@ auto main(int argc, char const* argv[]) -> int { When we submit a task to Spider, Spider returns a `Job` , which represents a scheduled, running, or completed task (or `TaskGraph`) in a Spider cluster. -> [!NOTE] `Job`s and `TaskGraph`s will be explained in another guide. +> [!NOTE] +> `Job`s and `TaskGraph`s will be explained in another guide. # Building the client @@ -190,8 +193,9 @@ docker run \ --publish 3306:3306 mariadb:latest ``` -> [!WARNING] When the container above is stopped, the database will be deleted. In production, you -> should set up a database instance with some form of data persistence. +> [!WARNING] +> When the container above is stopped, the database will be deleted. In production, you should set +> up a database instance with some form of data persistence. Alternatively, if you have an existing MySQL/MariaDB instance, you can use that as well. Simply create a database and authorize a user to access it. @@ -200,14 +204,14 @@ create a database and authorize a user to access it. To build the scheduler, run the following from the root of the project: -```bash +```shell cmake -S . -B build cmake --build build --parallel $(nproc) --target spider_scheduler ``` To start the scheduler, run: -```bash +```shell build/src/spider/spider_schdeuler \ --storage_url \ "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ @@ -224,14 +228,14 @@ NOTE: To build the worker, run the following from the root of the project: -```bash +```shell cmake -S . -B build cmake --build build --parallel $(nproc) --target spider_worker ``` To start a worker, run: -```bash +```shell build/src/spider/spider_worker \ --storage_url \ "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ @@ -243,14 +247,15 @@ NOTE: If you used a different set of arguments to set up the storage backend, ensure you update the `storage_url` argument in the command. -> [!TIP] You can start multiple workers to increase the number of concurrent tasks that can be run -> on the cluster. +> [!TIP] +> You can start multiple workers to increase the number of concurrent tasks that can be run on the +> cluster. # Running the client To run the client: -```bash +```shell ./client "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" ``` From afd3454daf6cc23947f8db72690976ecc941ee53 Mon Sep 17 00:00:00 2001 From: Sitao Wang Date: Tue, 17 Dec 2024 16:02:21 -0500 Subject: [PATCH 03/12] Add build instructions for client task library and executable --- docs/quick-start.md | 46 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index b89eb40..f71504f 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -85,7 +85,28 @@ function should be treated as a task. In order for Spider to run a task, the task needs to be compiled into a shared library that Spider can load. -TODO: Instructions and an example showing how to compile tasks into a shared library. +Suppose that spider is added in subdirectory `spider`, we can set up the following `CMakeLists.txt` +to build the task library: + +```cmake +cmake_minimum_required(VERSION 3.22.1) +project(spider_example) + +# Add spider library +add_subdirectory(spider) + +# Add the task library +add_library(tasks SHARED tasks.cpp tasks.hpp) +# Link the spider library to the task library +target_link_libraries(tasks PRIVATE spider::spider) +``` + +To build the shared library, run the following from the root of the project: + +```shell +cmake -S . -B build +cmake --build build --parallel $(nproc) --target tasks +``` # Writing a client to manage the task @@ -165,7 +186,28 @@ completed task (or `TaskGraph`) in a Spider cluster. The client can be compiled like any normal C++ application except that we need to link it to the Spider client library. -TODO: Instructions and an example for how to compile the client application. +Again, suppose that spider is added in subdirectory `spider`, we can set up the following +`CMakeLists.txt` to build the client program: + +```cmake +cmake_minimum_required(VERSION 3.22.1) +project(spider_example) + +# Add spider library +add_subdirectory(spider) + +# Add the client +add_executable(client client.cpp) +# Link the spider library to the client +target_link_libraries(client PRIVATE spider::spider) +``` + +To build the client executable, run the following from the root of the spider project: + +```shell +cmake -S . -B build +cmake --build build --parallel $(nproc) --target client +``` # Setting up a Spider cluster From 7cad08e7a01291351bbb551f6f6d40f21eb08bdc Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Wed, 18 Dec 2024 07:16:22 -0500 Subject: [PATCH 04/12] Replace tab indent. --- docs/quick-start.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index f71504f..f3515d8 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -68,7 +68,7 @@ _tasks.cpp_ // Task function implementation auto sum(spider::TaskContext& context, int x, int y) -> int { - return x + y; + return x + y; } // Register the task with Spider From 2cb34a8f65a701c3512759867971c15bba0cbbfc Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Wed, 18 Dec 2024 07:26:23 -0500 Subject: [PATCH 05/12] Address the Rabbit's comments. --- docs/quick-start.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index f3515d8..855aa88 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -26,7 +26,7 @@ To run through the guide below, you'll need: # Writing a task -In Spider, a task is C++ function that satisfies the following conditions: +In Spider, a task is a C++ function that satisfies the following conditions: * It is a non-member function. * It takes one or more parameters: * the first parameter must be a `TaskContext`. @@ -144,7 +144,9 @@ auto main(int argc, char const* argv[]) -> int { spider::Driver driver{storage_url}; // Submit the task for execution - spider::Job job = driver.start(sum, 2, 3); + int x = 2; + int y = 3; + spider::Job job = driver.start(sum, x, y); // Wait for the job to complete job.wait_complete(); @@ -154,11 +156,12 @@ auto main(int argc, char const* argv[]) -> int { switch (job_status) { case JobStatus::Succeeded: { auto result = job_status.get_result(); - if (result == job_status.get_result()) { + int expected = x + y; + if (expected == job_status.get_result()) { return 0; } else { - std::cerr << "`sum` returned unexpected result. Expected: 5. Actual: " << result - << '\n'. + std::cerr << "`sum` returned unexpected result. Expected: " << expected + << ". Actual: " << result << '\n'; return 1; } } @@ -254,7 +257,7 @@ cmake --build build --parallel $(nproc) --target spider_scheduler To start the scheduler, run: ```shell -build/src/spider/spider_schdeuler \ +build/src/spider/spider_scheduler \ --storage_url \ "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ --port 6000 From 8bb6b55fa7c00de0d74e3ed4ff59702dcd3cfffe Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Wed, 18 Dec 2024 07:37:38 -0500 Subject: [PATCH 06/12] Add task and client build instructions. --- docs/quick-start.md | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index 855aa88..8ddbb24 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -83,10 +83,8 @@ function should be treated as a task. # Building the task into a shared library In order for Spider to run a task, the task needs to be compiled into a shared library that Spider -can load. - -Suppose that spider is added in subdirectory `spider`, we can set up the following `CMakeLists.txt` -to build the task library: +can load. To do so, first, place `tasks.hpp` and `task.cpp` in a directory along with the Spider +directory. Then add the following `CMakeLists.txt` to the same directory. ```cmake cmake_minimum_required(VERSION 3.22.1) @@ -97,6 +95,7 @@ add_subdirectory(spider) # Add the task library add_library(tasks SHARED tasks.cpp tasks.hpp) + # Link the spider library to the task library target_link_libraries(tasks PRIVATE spider::spider) ``` @@ -187,20 +186,13 @@ completed task (or `TaskGraph`) in a Spider cluster. # Building the client The client can be compiled like any normal C++ application except that we need to link it to the -Spider client library. - -Again, suppose that spider is added in subdirectory `spider`, we can set up the following -`CMakeLists.txt` to build the client program: +Spider client library. To do so, add `client.cpp` to the directory that contains the task source +files. Then add the following to the `CMakeLists.txt`: ```cmake -cmake_minimum_required(VERSION 3.22.1) -project(spider_example) - -# Add spider library -add_subdirectory(spider) - # Add the client add_executable(client client.cpp) + # Link the spider library to the client target_link_libraries(client PRIVATE spider::spider) ``` From e4d122366759e668abd3c1f2fde78bc68ebcb170 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Wed, 18 Dec 2024 07:38:12 -0500 Subject: [PATCH 07/12] Address the Rabbit's comments. --- docs/quick-start.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index 8ddbb24..646c52d 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -156,7 +156,7 @@ auto main(int argc, char const* argv[]) -> int { case JobStatus::Succeeded: { auto result = job_status.get_result(); int expected = x + y; - if (expected == job_status.get_result()) { + if (expected == result) { return 0; } else { std::cerr << "`sum` returned unexpected result. Expected: " << expected @@ -258,7 +258,7 @@ build/src/spider/spider_scheduler \ NOTE: * If you used a different set of arguments to set up the storage backend, ensure you update the - `storage_url` argument ihn the command. + `storage_url` argument in the command. * If the scheduler fails to bind to port `6000`, change the port in the command and try again. ## Setting up a worker From 0b9c20c0db09fadaadbfe392b572c425f8b1075c Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Wed, 18 Dec 2024 17:24:07 -0500 Subject: [PATCH 08/12] Add CMake and Docker version requirements. --- docs/quick-start.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index 646c52d..9ed1130 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -18,9 +18,9 @@ To get started with Spider, you’ll need to: To run through the guide below, you'll need: -* CMake +* CMake 3.22.1+ * GCC 10+ or Clang 7+ -* [Docker] +* [Docker] 20.10+ * If you're not running as root, ensure `docker` can be run [without superuser privileges][docker-non-root]. From bd9f3f71140c5074569b895d290efe00e5bbfe6c Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Wed, 18 Dec 2024 19:35:28 -0500 Subject: [PATCH 09/12] Touch-ups. --- docs/quick-start.md | 77 +++++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index 9ed1130..146f1f1 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -3,9 +3,8 @@ Spider is a distributed system for executing user-defined tasks. It is designed to achieve low latency, high throughput, and robust fault tolerance. -The guide below briefly describes how to get started with running a task on Spider. - -To get started with Spider, you’ll need to: +The guide below briefly describes how to get started with running a task on Spider. At a high-level, +you'll need to: * Write a task * Build the task into a shared library @@ -14,9 +13,13 @@ To get started with Spider, you’ll need to: * Set up a Spider cluster * Run the client +> [!NOTE] +> Each code example below is prefixed with a suggested file path that we then use when compiling. +> If you choose different file paths, ensure you update the compilation commands to match. + # Requirements -To run through the guide below, you'll need: +In the guide below, you'll need: * CMake 3.22.1+ * GCC 10+ or Clang 7+ @@ -27,15 +30,16 @@ To run through the guide below, you'll need: # Writing a task In Spider, a task is a C++ function that satisfies the following conditions: + * It is a non-member function. * It takes one or more parameters: - * the first parameter must be a `TaskContext`. - * all other parameters must have types that conform to the `Serializable` or `Data` interfaces. + * The first parameter must be a `TaskContext`. + * All other parameters must have types that conform to the `Serializable` or `Data` interfaces. * It returns a value that conforms to the `Serializable` or `Data` interfaces. > [!NOTE] -> You don’t immediately need to understand the TaskContext, Serializable, or Data types as we'll -> explain them in later sections. +> You don't immediately need to understand the TaskContext, Serializable, or Data types as we'll +> explain them in other guides. For example, the task below computes and returns the sum of two integers. @@ -43,7 +47,7 @@ For example, the task below computes and returns the sum of two integers. > The task is split into a header file and an implementation file so that it can be loaded as a > library in the worker, as we'll see in later sections. -_tasks.hpp_ +`src/tasks.hpp`: ```c++ #include @@ -59,7 +63,7 @@ auto sum(spider::TaskContext& context, int x, int y) -> int; ``` -_tasks.cpp_ +`src/tasks.cpp`: ```c++ #include "tasks.hpp" @@ -77,26 +81,35 @@ SPIDER_REGISTER_TASK(sum); ``` The integer parameters and return value are `Serializable` values. -The `SPIDER_REGISTER_TASK` macro at the bottom of `tasks.cpp` is how we inform Spider that a +The `SPIDER_REGISTER_TASK` macro at the bottom of `src/tasks.cpp` is how we inform Spider that a function should be treated as a task. # Building the task into a shared library In order for Spider to run a task, the task needs to be compiled into a shared library that Spider -can load. To do so, first, place `tasks.hpp` and `task.cpp` in a directory along with the Spider -directory. Then add the following `CMakeLists.txt` to the same directory. +can load. To do so, first, copy the Spider project directory into the current directory to create +the following directory structure: + +* `spider/` +* `src/` + * `tasks.cpp` + * `tasks.hpp` + +Then add the following `CMakeLists.txt` to the same directory. + +`CMakelists.txt`: ```cmake cmake_minimum_required(VERSION 3.22.1) project(spider_example) -# Add spider library +# Add the Spider library add_subdirectory(spider) # Add the task library -add_library(tasks SHARED tasks.cpp tasks.hpp) +add_library(tasks SHARED src/tasks.cpp src/tasks.hpp) -# Link the spider library to the task library +# Link the Spider library to the task library target_link_libraries(tasks PRIVATE spider::spider) ``` @@ -118,7 +131,7 @@ To make Spider to run a task, we first need to write a client application. Gener For example, the client below runs the `sum` task from the previous section and verifies its result. -_client.cpp_ +`src/client.cpp`: ```c++ #include @@ -177,7 +190,7 @@ auto main(int argc, char const* argv[]) -> int { ``` -When we submit a task to Spider, Spider returns a `Job` , which represents a scheduled, running, or +When we submit a task to Spider, Spider returns a `Job`, which represents a scheduled, running, or completed task (or `TaskGraph`) in a Spider cluster. > [!NOTE] @@ -186,18 +199,17 @@ completed task (or `TaskGraph`) in a Spider cluster. # Building the client The client can be compiled like any normal C++ application except that we need to link it to the -Spider client library. To do so, add `client.cpp` to the directory that contains the task source -files. Then add the following to the `CMakeLists.txt`: +Spider client library. To do so, add the following to `CMakeLists.txt`: ```cmake # Add the client -add_executable(client client.cpp) +add_executable(client src/client.cpp) # Link the spider library to the client target_link_libraries(client PRIVATE spider::spider) ``` -To build the client executable, run the following from the root of the spider project: +To build the client executable, run: ```shell cmake -S . -B build @@ -234,22 +246,25 @@ docker run \ > When the container above is stopped, the database will be deleted. In production, you should set > up a database instance with some form of data persistence. +> [!WARNING] +> The container above is using hardcoded default credentials that shouldn't be used in production. + Alternatively, if you have an existing MySQL/MariaDB instance, you can use that as well. Simply create a database and authorize a user to access it. ## Setting up the scheduler -To build the scheduler, run the following from the root of the project: +To build the scheduler, run: ```shell -cmake -S . -B build -cmake --build build --parallel $(nproc) --target spider_scheduler +cmake -S spider -B spider/build +cmake --build spider/build --parallel $(nproc) --target spider_scheduler ``` To start the scheduler, run: ```shell -build/src/spider/spider_scheduler \ +spider/build/src/spider/spider_scheduler \ --storage_url \ "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ --port 6000 @@ -263,17 +278,17 @@ NOTE: ## Setting up a worker -To build the worker, run the following from the root of the project: +To build the worker, run: ```shell -cmake -S . -B build -cmake --build build --parallel $(nproc) --target spider_worker +cmake -S spider -B build +cmake --build spider/build --parallel $(nproc) --target spider_worker ``` To start a worker, run: ```shell -build/src/spider/spider_worker \ +spider/build/src/spider/spider_worker \ --storage_url \ "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" \ --port 6000 @@ -293,7 +308,7 @@ If you used a different set of arguments to set up the storage backend, ensure y To run the client: ```shell -./client "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" +build/client "jdbc:mariadb://localhost:3306/spider-storage?user=spider&password=password" ``` NOTE: From f33d8c8d435e4460741f27c325388d811127a133 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Wed, 18 Dec 2024 19:39:36 -0500 Subject: [PATCH 10/12] Add some commas suggested by the Rabbit. --- docs/quick-start.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index 146f1f1..5ac85d7 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -198,7 +198,7 @@ completed task (or `TaskGraph`) in a Spider cluster. # Building the client -The client can be compiled like any normal C++ application except that we need to link it to the +The client can be compiled like any normal C++ application, except that we need to link it to the Spider client library. To do so, add the following to `CMakeLists.txt`: ```cmake @@ -318,7 +318,7 @@ storage backend URL in the command. # Next steps -In future guides, we'll explain how to write more complex tasks as well as how to leverage Spider's +In future guides, we'll explain how to write more complex tasks, as well as how to leverage Spider's support for fault tolerance. [Docker]: https://docs.docker.com/engine/install/ From a4bcd0cbfd26d69c0bae731a8c8569d20482a344 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Wed, 18 Dec 2024 19:45:43 -0500 Subject: [PATCH 11/12] Fix errors in the client's code. --- docs/quick-start.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index 5ac85d7..12dbd47 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -150,6 +150,7 @@ auto main(int argc, char const* argv[]) -> int { std::string storage_url{argv[1]}; if (storage_url.empty()) { std::cerr << "storage-backend-url cannot be empty." << '\n'; + return 1; } // Create a driver that connects to the Spider cluster @@ -170,18 +171,19 @@ auto main(int argc, char const* argv[]) -> int { auto result = job_status.get_result(); int expected = x + y; if (expected == result) { - return 0; - } else { - std::cerr << "`sum` returned unexpected result. Expected: " << expected - << ". Actual: " << result << '\n'; + return 0; + } else { + std::cerr << "`sum` returned unexpected result. Expected: " << expected + << ". Actual: " << result << '\n'; return 1; } } - case JobStatus::Failed: + case JobStatus::Failed: { std::pair error_and_fn_name = job.get_error(); std::cerr << "Job failed in function " << error_and_fn_name.second << " - " << error_and_fn_name.first << '\n'; return 1; + } default: std::cerr << "Job is in unexpected state - " << job_status << '\n'; return 1; From 3a9ce6794d031b96828cd037080300f4bb95e011 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Wed, 18 Dec 2024 20:12:01 -0500 Subject: [PATCH 12/12] Remove blank lines in code blocks. --- docs/quick-start.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/quick-start.md b/docs/quick-start.md index 12dbd47..0836ab3 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -60,7 +60,6 @@ For example, the task below computes and returns the sum of two integers. * @return The sum of x and y. */ auto sum(spider::TaskContext& context, int x, int y) -> int; - ``` `src/tasks.cpp`: @@ -77,7 +76,6 @@ auto sum(spider::TaskContext& context, int x, int y) -> int { // Register the task with Spider SPIDER_REGISTER_TASK(sum); - ``` The integer parameters and return value are `Serializable` values. @@ -189,7 +187,6 @@ auto main(int argc, char const* argv[]) -> int { return 1; } } - ``` When we submit a task to Spider, Spider returns a `Job`, which represents a scheduled, running, or