diff --git a/Makefile b/Makefile index 4c4a495..9befe64 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ CC = clang++ PERFORMANCE_PROFILE_DIR = profiles -CPPFLAGS = -Werror -I include -fopencilk -std=c++2a -pthread +CPPFLAGS = -Werror -I include -fopencilk -std=c++2a -pthread LDFLAGS = -L$(CURDIR)/include -lstdc++ -lm -fopencilk ifeq ($(DEBUG), 1) diff --git a/activation_functions.cpp b/activation_functions.cpp index 9115594..7ec89ff 100644 --- a/activation_functions.cpp +++ b/activation_functions.cpp @@ -5,6 +5,7 @@ #include "matrix.h" #include "m_algorithms.h" #include "matrix_printer.h" +#include "matrix_benchmark.h" #include "config.h" @@ -15,21 +16,14 @@ std::unique_ptr NeuralNetwork::ActivationFunctions::ReLU throw std::invalid_argument("Matrix has no data (pointing to null)."); } + Matrix::Operations::Timer relu( + std::make_unique()); - auto f = [](std::unique_ptr input) { - std::unique_ptr output = std::make_unique( - Matrix::Rows(input->num_rows()), - Matrix::Columns(input->num_cols()) - ); - std::replace_copy_if(input->scanStart(), input->scanEnd(), output->scanStart(), - [](float z){ return z < 0;}, 0); + // Matrix::Operations::Unary::ReLU relu; - return output; - }; - - std::unique_ptr output = f(std::move(input)); + std::unique_ptr output = relu(std::move(input)); #if DEBUG Matrix::Printer m_printer; diff --git a/include/m_algorithms.h b/include/m_algorithms.h index 55a6afa..ae5fd81 100644 --- a/include/m_algorithms.h +++ b/include/m_algorithms.h @@ -3,6 +3,7 @@ #include #include +#include #include "matrix.h" @@ -12,151 +13,198 @@ namespace Matrix { namespace Operations { - class BaseBinaryOpInterface { - public: - virtual ~BaseBinaryOpInterface() = default; - virtual std::unique_ptr operator()( - std::unique_ptr& l, - std::unique_ptr& r) = 0; - - - }; - - template - class BaseOp : public BaseBinaryOpInterface{ + class BaseInterface { public: + virtual ~BaseInterface() = default; virtual std::unique_ptr operator()( - std::unique_ptr& l, - std::unique_ptr& r) { return Impl().operator()(l, r); }; - virtual ~BaseOp() = default; - private: - BaseOp& Impl() { return *static_cast(this); } - BaseOp() = default; - friend Implementation; + std::unique_ptr l, + std::unique_ptr r = nullptr) = 0; + }; + namespace Unary { - std::string debug_message(std::unique_ptr& l, - std::unique_ptr& r); - - std::string debug_message_2(std::unique_ptr& l, - std::unique_ptr& r); + template + class UnaryAdapter : public BaseInterface { + std::unique_ptr operator()( + std::unique_ptr l, + std::unique_ptr r = nullptr) { + + if (r != nullptr) { + throw std::invalid_argument("Unary Operation needs one operand."); + } + return Impl().operator()(std::move(l)); + }; + + ~UnaryAdapter() = default; + private: + Implementation& Impl() { return *static_cast(this); } + friend Implementation; + + }; - namespace Addition { + class ReLU : public UnaryAdapter { - class Std : public BaseOp { public: std::unique_ptr operator()( - std::unique_ptr& l, - std::unique_ptr& r) override; + std::unique_ptr m); }; + + } - namespace OuterProduct { + namespace Binary { + template + class BaseOp : public BaseInterface { - class Naive : public BaseOp { public: - std::unique_ptr operator()( - std::unique_ptr& l, - std::unique_ptr& r) override; + BaseOp() = default; + virtual std::unique_ptr operator()( + std::unique_ptr l, + std::unique_ptr r) { return Impl().operator()(std::move(l), std::move(r)); }; + virtual ~BaseOp() = default; + private: + Implementation& Impl() { return *static_cast(this); } + friend Implementation; }; - } + std::string debug_message(std::unique_ptr l, + std::unique_ptr r); + + std::string debug_message_2(std::unique_ptr l, + std::unique_ptr r); - namespace HadamardProduct { - - class Naive : public BaseOp { + namespace Addition { - public: - std::unique_ptr operator()( - std::unique_ptr& l, - std::unique_ptr& r) override; - }; + class Std : public BaseOp { + public: + std::unique_ptr operator()( + std::unique_ptr l, + std::unique_ptr r); + }; - class Std : public BaseOp { + } - public: - std::unique_ptr operator()( - std::unique_ptr& l, - std::unique_ptr& r) override; + namespace OuterProduct { - }; - } + class Naive : public BaseOp { + public: + std::unique_ptr operator()( + std::unique_ptr l, + std::unique_ptr r); + }; - /* - Matrix Multiplication Usage: + } - std::unique_ptr ma = std::make_unique(2000, 100); - std::unique_ptr mb = std::make_unique(100, 3000); - Matrix::Operations::Multiplication::Naive mul; - std::unique_ptr mc = mul(ma, mb); - */ - namespace Multiplication { - class Naive : public BaseOp { - public: - std::unique_ptr operator()( - std::unique_ptr& l, - std::unique_ptr& r) override; + namespace HadamardProduct { - }; + + class Naive : public BaseOp { + public: + std::unique_ptr operator()( + std::unique_ptr l, + std::unique_ptr r); - class Square : public BaseOp { + }; - public: - std::unique_ptr operator()( - std::unique_ptr& l, - std::unique_ptr& r) override; - }; + class Std : public BaseOp { + public: + std::unique_ptr operator()( + std::unique_ptr l, + std::unique_ptr r); - class ParallelDNC : public BaseOp { + }; - public: - std::unique_ptr operator()( - std::unique_ptr& l, - std::unique_ptr& r) override; - }; + } - void add_matmul_rec(std::vector::iterator c, std::vector::iterator a, std::vector::iterator b, - int m, int n, int p, int fdA, int fdB, int fdC); - - } - } + /* + Matrix Multiplication Usage: + + std::unique_ptr ma = std::make_unique(2000, 100); + std::unique_ptr mb = std::make_unique(100, 3000); + + Matrix::Operations::Multiplication::Naive mul; + + std::unique_ptr mc = mul(ma, mb); + */ + namespace Multiplication { + + + class Naive : public BaseOp { + + public: + std::unique_ptr operator()( + std::unique_ptr l, + std::unique_ptr r); + + }; + + + class Square : public BaseOp { + + public: + std::unique_ptr operator()( + std::unique_ptr l, + std::unique_ptr r) ; + + }; + + + class ParallelDNC : public BaseOp { + + public: + std::unique_ptr operator()( + std::unique_ptr l, + std::unique_ptr r); + + }; + + + void add_matmul_rec(std::vector::iterator c, std::vector::iterator a, std::vector::iterator b, + int m, int n, int p, int fdA, int fdB, int fdC); + + + } // namespace Multiplication + + } // namespace Binary + + } // namespace Operations -} +} // namespace Matrix #endif // MATRIX_ALGORITHMS_H \ No newline at end of file diff --git a/include/matrix_benchmark.h b/include/matrix_benchmark.h index 3a632f1..096537c 100644 --- a/include/matrix_benchmark.h +++ b/include/matrix_benchmark.h @@ -2,6 +2,7 @@ #define MATRIX_BENCHMARKER_H #include +#include #include "matrix.h" #include "m_algorithms.h" @@ -9,70 +10,93 @@ namespace Matrix { - template - class Benchmark { + namespace Operations { - public: - Benchmark(std::unique_ptr> _m) : matrix_operation(std::move(_m)) {} - protected: - virtual std::unique_ptr operator()( - std::unique_ptr& l, - std::unique_ptr& r) = 0; - virtual ~Benchmark() = default; - std::unique_ptr> matrix_operation; - }; + template + class Benchmark { + public: + Benchmark(std::unique_ptr _m) : matrix_operation(std::move(_m)) {} + protected: + Implementation* Impl() { return static_cast(this);} + std::unique_ptr operator()( + std::unique_ptr l, + std::unique_ptr r = nullptr) { + std::cout << "Entered Benchmark Wrapper" << std::endl; + return Impl()->operator()(l, r); + }; + ~Benchmark() = default; + std::unique_ptr matrix_operation; - /* - DESCRIPTION: + }; - Decorator for BaseOp() class Function objects, used to benchmark algorithm performance. - USAGE: - - using matrix_t = Matrix::Representation; - std::unique_ptr ma = std::make_unique(5000, 5000); - std::unique_ptr mb = std::make_unique(5000, 5000); - Matrix::Generation::Normal<0, 1> normal_distribution_init; - ma = normal_distribution_init(std::move(ma)); - mb = normal_distribution_init(std::move(mb)); + /* + DESCRIPTION: - std::unique_ptr mul_ptr_r = std::make_unique(); - Matrix::Timer mul_bm_r(std::move(mul_ptr_r)); - std::unique_ptr mf = mul_bm_r(ma, mb); - - */ - template - class Timer : public Benchmark { + Decorator for BaseInterface() class Function objects, used to benchmark algorithm performance. - public: - Timer(std::unique_ptr> _m) : Benchmark(std::move(_m)) {} - std::unique_ptr operator()( - std::unique_ptr& l, - std::unique_ptr& r) override; + USAGE: + + using matrix_t = Matrix::Representation; + std::unique_ptr ma = std::make_unique(5000, 5000); + std::unique_ptr mb = std::make_unique(5000, 5000); + Matrix::Generation::Normal<0, 1> normal_distribution_init; - }; + ma = normal_distribution_init(std::move(ma)); + mb = normal_distribution_init(std::move(mb)); + std::unique_ptr mul_ptr_r = std::make_unique(); + Matrix::Timer mul_bm_r(std::move(mul_ptr_r)); + std::unique_ptr mf = mul_bm_r(ma, mb); + + */ + class Timer : public Benchmark { -// #ifdef CILKSCALE - template - class ParallelMeasurer : public Benchmark { + public: + // Timer() : Benchmark(std::move( + // std::make_unique())) {} + Timer(std::unique_ptr _m) : + Benchmark(std::move(_m)) {} + + std::unique_ptr operator()( + std::unique_ptr l, + std::unique_ptr r = nullptr); - public: - ParallelMeasurer(std::unique_ptr> _m) : Benchmark(std::move(_m)) {} - std::unique_ptr operator()( - std::unique_ptr& l, - std::unique_ptr& r) override; + int get_computation_duration_ms() { + return std::chrono::duration_cast>(end - start).count(); } - }; -// #endif + std::chrono::steady_clock::time_point get_start() { return start; } + std::chrono::steady_clock::time_point get_end() { return end; } + private: + std::chrono::steady_clock::time_point start; + std::chrono::steady_clock::time_point end; + }; + + + // #ifdef CILKSCALE + // class ParallelMeasurer : public Benchmark { + + // public: + // ParallelMeasurer(std::unique_ptr _m) : + // Benchmark(std::move(_m)) {} + // std::unique_ptr operator()( + // std::unique_ptr l, + // std::unique_ptr r); + + // }; + // #endif + + + } + + + } -} -#include "t_matrix_benchmark.cpp" #endif //MATRIX_BENCHMARKER_H \ No newline at end of file diff --git a/include/t_matrix_benchmark.cpp b/include/t_matrix_benchmark.cpp deleted file mode 100644 index 85c26b7..0000000 --- a/include/t_matrix_benchmark.cpp +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef TEMPLATED_MATRIX_BENCHMARKER_IMPLEMENTATION -#define TEMPLATED_MATRIX_BENCHMARKER_IMPLEMENTATION - -#include -#include - -#include -#include - -#include "matrix_benchmark.h" - - -namespace Matrix { - - - template - std::unique_ptr Timer::operator()(std::unique_ptr& l, - std::unique_ptr& r) { - - - struct timespec start, end; - - clock_gettime(CLOCK_MONOTONIC, &start); - std::unique_ptr mc = this->matrix_operation->operator()(l, r); - clock_gettime(CLOCK_MONOTONIC, &end); - - double tdiff = (end.tv_sec - start.tv_sec) + 1e-9*(end.tv_nsec - start.tv_nsec); - - std::cout << "Performed in: " << tdiff << " Seconds." << std::endl; - - return mc; - } - -// #ifdef CILKSCALE - /* - Cilkscale's command-line output includes work and span measurements for the Cilk program in terms of empirically measured times. - Parallelism measurements are derived from these times. - A simple struct wsp_t contains the number of nanoseconds for work and span. This data is collected immediately before and after - the wrapped function's execution. Then these two measurements are subtracted and dumped to stdout in CSV format, with the first - column being the label of the measurement. At the end, the same measurements are output for the program as a whole with an - empty label. The final measurement includes all the setup and teardown code, which pollutes the measurement we are interested in. - Because the dump to stdout can interleave with other program output, you might want to set the environment variable - CILKSCALE_OUT="filename.csv" to redirect Cilkscale output to a specific file (you will only be able to access that file when - running Cilkscale instrumented programs locally --- awsrun.py currently doesn't return output files). - - In addition to a span column, you are also seeing a "burdened span" column. Burdened span accounts for the worst possible - migration overhead, which can come from work-stealing and other factors. - */ - template - std::unique_ptr ParallelMeasurer::operator()(std::unique_ptr& l, - std::unique_ptr& r) { - - wsp_t start_wsp, stop_wsp; - - start_wsp = wsp_getworkspan(); - std::unique_ptr mc = this->matrix_operation->operator()(l, r); - stop_wsp = wsp_getworkspan(); - - wsp_dump(wsp_sub(stop_wsp, start_wsp), "Cilkscale Parallel Measurement:"); - - return mc; - } -// #endif - -} - -#endif // TEMPLATED_MATRIX_BENCHMARKER_IMPLEMENTATION \ No newline at end of file diff --git a/m_algorithms.cpp b/m_algorithms.cpp index 7161cf3..9b83f08 100644 --- a/m_algorithms.cpp +++ b/m_algorithms.cpp @@ -9,266 +9,292 @@ namespace Matrix { namespace Operations { + namespace Unary { - std::string debug_message(std::unique_ptr&l, - std::unique_ptr&r) { + + std::unique_ptr ReLU::operator()( + std::unique_ptr m){ - std::string error_msg = "Matrix A Columns not equal to Matrix B Rows: [" + - std::to_string(l->num_rows()) + "," + - std::to_string(l->num_cols()) + "] X [" + - std::to_string(r->num_rows()) + "," + - std::to_string(r->num_cols()) + "]"; + std::unique_ptr output = std::make_unique( + Matrix::Rows(m->num_rows()), + Matrix::Columns(m->num_cols()) + ); + std::replace_copy_if(m->scanStart(), m->scanEnd(), output->scanStart(), + [](float z){ return z < 0;}, 0); - return error_msg; + return output; } + } - std::string debug_message_2(std::unique_ptr&l, - std::unique_ptr&r) { - - std::string error_msg = "Matrix A size not equal to Matrix B: [" + - std::to_string(l->num_rows()) + "," + - std::to_string(l->num_cols()) + "] X [" + - std::to_string(r->num_rows()) + "," + - std::to_string(r->num_cols()) + "]"; - return error_msg; - } + namespace Binary { - namespace Addition { + std::string debug_message(std::unique_ptrl, + std::unique_ptrr) { - std::unique_ptr Std::operator()( - std::unique_ptr& l, - std::unique_ptr& r) { + std::string error_msg = "Matrix A Columns not equal to Matrix B Rows: [" + + std::to_string(l->num_rows()) + "," + + std::to_string(l->num_cols()) + "] X [" + + std::to_string(r->num_rows()) + "," + + std::to_string(r->num_cols()) + "]"; - if ((l->num_rows() != r->num_rows()) && (l->num_cols() != r->num_cols())) { - throw std::length_error(debug_message_2(l, r)); + + return error_msg; } - - auto output = std::make_unique(Rows(l->num_rows()), Columns(r->num_cols())); - std::transform(l->scanStart(), l->scanEnd(), r->scanStart(), output->scanStart(), std::plus()); - return output; - } - } + std::string debug_message_2(std::unique_ptrl, + std::unique_ptrr) { + + std::string error_msg = "Matrix A size not equal to Matrix B: [" + + std::to_string(l->num_rows()) + "," + + std::to_string(l->num_cols()) + "] X [" + + std::to_string(r->num_rows()) + "," + + std::to_string(r->num_cols()) + "]"; - namespace OuterProduct { + return error_msg; + } - std::unique_ptr Naive::operator()( - std::unique_ptr& l, - std::unique_ptr& r) { + namespace Addition { - if (l->num_rows() != r->num_rows() && l->num_cols() != r->num_cols()) { - throw std::length_error(debug_message_2(l, r)); - } - if (l->num_rows() != 1 && l->num_cols() != 1) { - throw std::length_error("Operands are not Vectors."); - } - - u_int64_t dimension; + std::unique_ptr Std::operator()( + std::unique_ptr l, + std::unique_ptr r) { + + if ((l->num_rows() != r->num_rows()) && (l->num_cols() != r->num_cols())) { + throw std::length_error(debug_message_2(std::move(l), std::move(r))); + } + + auto output = std::make_unique(Rows(l->num_rows()), Columns(r->num_cols())); + + std::transform(l->scanStart(), l->scanEnd(), r->scanStart(), output->scanStart(), std::plus()); - if (l->num_rows() > l->num_cols()) { - dimension = l->num_rows(); + return output; } - else dimension = l->num_cols(); + } + + + namespace OuterProduct { - auto output = std::make_unique(Rows(dimension), Columns(dimension)); - auto li = l->scanStart(); + std::unique_ptr Naive::operator()( + std::unique_ptr l, + std::unique_ptr r) { - for (int i = 0; li != l->scanEnd(); li++, i++) { - auto ri = r->scanStart(); + if (l->num_rows() != r->num_rows() && l->num_cols() != r->num_cols()) { + throw std::length_error(debug_message_2(std::move(l), std::move(r))); + } + if (l->num_rows() != 1 && l->num_cols() != 1) { + throw std::length_error("Operands are not Vectors."); + } - for (int j = 0; ri != r->scanEnd(); ri++, j++) { - float val = *li * *ri; - output->put(i, j, val); + u_int64_t dimension; + + if (l->num_rows() > l->num_cols()) { + dimension = l->num_rows(); } + else dimension = l->num_cols(); + + auto output = std::make_unique(Rows(dimension), Columns(dimension)); + + auto li = l->scanStart(); + + for (int i = 0; li != l->scanEnd(); li++, i++) { + auto ri = r->scanStart(); + + for (int j = 0; ri != r->scanEnd(); ri++, j++) { + float val = *li * *ri; + output->put(i, j, val); + } + } + + return output; } - - return output; + } - } + namespace HadamardProduct { - namespace HadamardProduct { + std::unique_ptr Std::operator()( + std::unique_ptr l, + std::unique_ptr r) { - std::unique_ptr Std::operator()( - std::unique_ptr& l, - std::unique_ptr& r) { + auto output = std::make_unique(Rows(l->num_rows()), Columns(r->num_cols())); - auto output = std::make_unique(Rows(l->num_rows()), Columns(r->num_cols())); + + std::transform(l->scanStart(), l->scanEnd(), r->scanStart(), output->scanStart(), std::multiplies()); + + return output; + } - - std::transform(l->scanStart(), l->scanEnd(), r->scanStart(), output->scanStart(), std::multiplies()); - - return output; - } + std::unique_ptr Naive::operator()( + std::unique_ptr l, + std::unique_ptr r) { - std::unique_ptr Naive::operator()( - std::unique_ptr& l, - std::unique_ptr& r) { + if ((l->num_rows() != r->num_rows()) && (l->num_cols() != r->num_cols())) { + throw std::length_error("Matrix A not same size as Matrix B."); + } - if ((l->num_rows() != r->num_rows()) && (l->num_cols() != r->num_cols())) { - throw std::length_error("Matrix A not same size as Matrix B."); - } + std::unique_ptr output = std::make_unique(Rows(l->num_rows()), Columns(r->num_cols())); - std::unique_ptr output = std::make_unique(Rows(l->num_rows()), Columns(r->num_cols())); + for (u_int64_t i = 0; i < l->num_rows(); i++) { + + for (u_int64_t j = 0; j < r->num_cols(); j++) { - for (u_int64_t i = 0; i < l->num_rows(); i++) { - - for (u_int64_t j = 0; j < r->num_cols(); j++) { + float val = l->get(i, j) * r->get(i, j); - float val = l->get(i, j) * r->get(i, j); + output->put(i, j, val); - output->put(i, j, val); + } } + + return output; } + } - return output; - } - } + namespace Multiplication { + std::unique_ptr Naive::operator()( + std::unique_ptr l, + std::unique_ptr r) { - namespace Multiplication { + if (l->num_cols() != r->num_rows()) { + throw std::length_error(debug_message(std::move(l), std::move(r))); - std::unique_ptr Naive::operator()( - std::unique_ptr& l, - std::unique_ptr& r) { + } - if (l->num_cols() != r->num_rows()) { - throw std::length_error(debug_message(l, r)); + std::unique_ptr output = std::make_unique(Rows(l->num_rows()), Columns(r->num_cols())); - } - std::unique_ptr output = std::make_unique(Rows(l->num_rows()), Columns(r->num_cols())); + for (u_int64_t i = 0; i < l->num_rows(); i++) { + + for (u_int64_t j = 0; j < r->num_cols(); j++) { - for (u_int64_t i = 0; i < l->num_rows(); i++) { - - for (u_int64_t j = 0; j < r->num_cols(); j++) { + float val = 0; + for (u_int64_t k = 0; k < l->num_cols(); k++) { + val += l->get(i, k) * r->get(k, j); + } - float val = 0; + output->put(i, j, val); - for (u_int64_t k = 0; k < l->num_cols(); k++) { - val += l->get(i, k) * r->get(k, j); } - output->put(i, j, val); - } - } - - return output; - } + return output; + } - - /* - Adapted from https://ocw.mit.edu/courses/mathematics/18-335j-introduction-to-numerical-methods-spring-2019/week-5/MIT18_335JS19_lec12.pdf - */ - void add_matmul_rec(std::vector::iterator a, std::vector::iterator b, std::vector::iterator c, - int m, int n, int p, int fdA, int fdB, int fdC) { - - if (m + n + p <= 48) { - int i, j, k; + + /* + Adapted from https://ocw.mit.edu/courses/mathematics/18-335j-introduction-to-numerical-methods-spring-2019/week-5/MIT18_335JS19_lec12.pdf + */ + void add_matmul_rec(std::vector::iterator a, std::vector::iterator b, std::vector::iterator c, + int m, int n, int p, int fdA, int fdB, int fdC) { - for (i = 0; i < m; ++i) { - for (k = 0; k < p; ++k) { - float sum = 0; - for (j = 0; j < n; ++j) - sum += *(a + (i * fdA + j)) * *(b + (j * fdB + k)); - *(c + (i * fdC + k)) += sum; - + if (m + n + p <= 48) { + int i, j, k; + + for (i = 0; i < m; ++i) { + for (k = 0; k < p; ++k) { + float sum = 0; + for (j = 0; j < n; ++j) + sum += *(a + (i * fdA + j)) * *(b + (j * fdB + k)); + *(c + (i * fdC + k)) += sum; + + } } } - } - else { - int m2 = m/2, n2 = n/2, p2 = p/2; + else { + int m2 = m/2, n2 = n/2, p2 = p/2; + + cilk_spawn add_matmul_rec(a, b, c, m2, n2, p2, fdA, fdB, fdC); + cilk_spawn add_matmul_rec(a, b + p2, c + p2, m2, n2, p - p2, fdA, fdB, fdC); + cilk_spawn add_matmul_rec(a + m2*fdA + n2, b + n2*fdB, c + m2*fdC, m-m2, n - n2, p2, fdA, fdB, fdC); + add_matmul_rec(a + m2*fdA + n2, b + p2 + n2*fdB, c + m2*fdC + p2, m - m2, n - n2, p - p2, fdA, fdB, fdC); + cilk_sync; - cilk_spawn add_matmul_rec(a, b, c, m2, n2, p2, fdA, fdB, fdC); - cilk_spawn add_matmul_rec(a, b + p2, c + p2, m2, n2, p - p2, fdA, fdB, fdC); - cilk_spawn add_matmul_rec(a + m2*fdA + n2, b + n2*fdB, c + m2*fdC, m-m2, n - n2, p2, fdA, fdB, fdC); - add_matmul_rec(a + m2*fdA + n2, b + p2 + n2*fdB, c + m2*fdC + p2, m - m2, n - n2, p - p2, fdA, fdB, fdC); - cilk_sync; - - cilk_spawn add_matmul_rec(a + n2, b + n2*fdB, c, m2, n - n2, p2, fdA, fdB, fdC); - cilk_spawn add_matmul_rec(a + m2*fdA, b, c + m2*fdC, m - m2, n2, p2, fdA, fdB, fdC); - cilk_spawn add_matmul_rec(a + n2 , b + p2 + n2*fdB, c + p2, m2, n - n2, p - p2, fdA, fdB, fdC); - add_matmul_rec(a + m2*fdA, b + p2, c + m2*fdC + p2, m - m2, n2, p - p2, fdA, fdB, fdC); - cilk_sync; + cilk_spawn add_matmul_rec(a + n2, b + n2*fdB, c, m2, n - n2, p2, fdA, fdB, fdC); + cilk_spawn add_matmul_rec(a + m2*fdA, b, c + m2*fdC, m - m2, n2, p2, fdA, fdB, fdC); + cilk_spawn add_matmul_rec(a + n2 , b + p2 + n2*fdB, c + p2, m2, n - n2, p - p2, fdA, fdB, fdC); + add_matmul_rec(a + m2*fdA, b + p2, c + m2*fdC + p2, m - m2, n2, p - p2, fdA, fdB, fdC); + cilk_sync; + } } - } - std::unique_ptr ParallelDNC::operator()( - std::unique_ptr& l, - std::unique_ptr& r) { + std::unique_ptr ParallelDNC::operator()( + std::unique_ptr l, + std::unique_ptr r) { - if (l->num_cols() != r->num_rows()) { - - throw std::length_error(debug_message(l, r)); - } + if (l->num_cols() != r->num_rows()) { + + throw std::length_error(debug_message(std::move(l), std::move(r))); + } - std::unique_ptr output = std::make_unique(Rows(l->num_rows()), Columns(r->num_cols())); + std::unique_ptr output = std::make_unique(Rows(l->num_rows()), Columns(r->num_cols())); - add_matmul_rec(l->scanStart(), r->scanStart(), output->scanStart(), l->num_rows(), l->num_cols(), r->num_cols(), l->num_cols(), r->num_cols(), r->num_cols()); + add_matmul_rec(l->scanStart(), r->scanStart(), output->scanStart(), l->num_rows(), l->num_cols(), r->num_cols(), l->num_cols(), r->num_cols(), r->num_cols()); - return output; - } - - - std::unique_ptr Square::operator()( - std::unique_ptr& l, - std::unique_ptr& r) { + return output; + } + + + std::unique_ptr Square::operator()( + std::unique_ptr l, + std::unique_ptr r) { - if (l->num_cols() != r->num_rows()) { - throw std::length_error(debug_message(l, r)); + if (l->num_cols() != r->num_rows()) { + throw std::length_error(debug_message(std::move(l), std::move(r))); - } + } - std::unique_ptr output = std::make_unique(Rows(l->num_rows()), Columns(r->num_cols())); + std::unique_ptr output = std::make_unique(Rows(l->num_rows()), Columns(r->num_cols())); - cilk_for (u_int64_t i = 0; i < l->num_rows(); i++) { - - for (u_int64_t j = 0; j < r->num_cols(); j++) { + cilk_for (u_int64_t i = 0; i < l->num_rows(); i++) { + + for (u_int64_t j = 0; j < r->num_cols(); j++) { - float val = 0; + float val = 0; - for (u_int64_t k = 0; k < l->num_cols(); k++) { - val += l->get(i, k) * r->get(k, j); - } + for (u_int64_t k = 0; k < l->num_cols(); k++) { + val += l->get(i, k) * r->get(k, j); + } + + output->put(i, j, val); - output->put(i, j, val); + } } - } + return output; + } + + } // namespace Multiplication - return output; - } - } + } // namespace Binary - } + } // namespace Operations -} +} // namespace Matrix diff --git a/matrix_benchmark.cpp b/matrix_benchmark.cpp index a4a14f3..c9c7f5d 100644 --- a/matrix_benchmark.cpp +++ b/matrix_benchmark.cpp @@ -1,3 +1,57 @@ +#include +#include +#include + +#include +#include #include "matrix_benchmark.h" + +namespace Matrix { + + + std::unique_ptr Operations::Timer::operator()(std::unique_ptr l, + std::unique_ptr r) { + + std::cout << "Entered Timer wrapper" << std::endl; + + start = std::chrono::steady_clock::now(); + std::unique_ptr mc = this->matrix_operation->operator()(std::move(l), std::move(r)); + end = std::chrono::steady_clock::now(); + + + return mc; + } + +// #ifdef CILKSCALE + /* + Cilkscale's command-line output includes work and span measurements for the Cilk program in terms of empirically measured times. + Parallelism measurements are derived from these times. + A simple struct wsp_t contains the number of nanoseconds for work and span. This data is collected immediately before and after + the wrapped function's execution. Then these two measurements are subtracted and dumped to stdout in CSV format, with the first + column being the label of the measurement. At the end, the same measurements are output for the program as a whole with an + empty label. The final measurement includes all the setup and teardown code, which pollutes the measurement we are interested in. + Because the dump to stdout can interleave with other program output, you might want to set the environment variable + CILKSCALE_OUT="filename.csv" to redirect Cilkscale output to a specific file (you will only be able to access that file when + running Cilkscale instrumented programs locally --- awsrun.py currently doesn't return output files). + + In addition to a span column, you are also seeing a "burdened span" column. Burdened span accounts for the worst possible + migration overhead, which can come from work-stealing and other factors. + */ + // std::unique_ptr Operations::ParallelMeasurer::operator()(std::unique_ptr l, + // std::unique_ptr r) { + + // wsp_t start_wsp, stop_wsp; + + // start_wsp = wsp_getworkspan(); + // std::unique_ptr mc = this->matrix_operation->operator()(l, r); + // stop_wsp = wsp_getworkspan(); + + // wsp_dump(wsp_sub(stop_wsp, start_wsp), "Cilkscale Parallel Measurement:"); + + // return mc; + // } +// #endif + +} diff --git a/unittests/test_matrix_addition.cpp b/unittests/test_matrix_addition.cpp index ec3ce70..aa7196c 100644 --- a/unittests/test_matrix_addition.cpp +++ b/unittests/test_matrix_addition.cpp @@ -20,9 +20,9 @@ TEST_CASE("Matrix Addition", "[arithmetic]") test_output = init_as_two(std::move(test_output)); - Matrix::Operations::Addition::Std naive_add; + Matrix::Operations::Binary::Addition::Std naive_add; - std::unique_ptr sum = naive_add(matrix_with_ones, matrix_with_ones); + std::unique_ptr sum = naive_add(std::move(matrix_with_ones), std::move(matrix_with_ones)); SECTION("Cilk-for Multiplication") diff --git a/unittests/test_matrix_multiplication.cpp b/unittests/test_matrix_multiplication.cpp index 120ccae..2ea5923 100644 --- a/unittests/test_matrix_multiplication.cpp +++ b/unittests/test_matrix_multiplication.cpp @@ -17,13 +17,13 @@ TEST_CASE("Matrix Multiplication", "[arithmetic]") mb = normal_distribution_init(std::move(mb)); - Matrix::Operations::Multiplication::Naive naive_mul; - Matrix::Operations::Multiplication::Square c_mul; - Matrix::Operations::Multiplication::ParallelDNC r_mul; + Matrix::Operations::Binary::Multiplication::Naive naive_mul; + Matrix::Operations::Binary::Multiplication::Square c_mul; + Matrix::Operations::Binary::Multiplication::ParallelDNC r_mul; - std::unique_ptr mc = naive_mul(ma, mb); - std::unique_ptr md = c_mul(ma, mb); - std::unique_ptr me = r_mul(ma, mb); + std::unique_ptr mc = naive_mul(std::move(ma), std::move(mb)); + std::unique_ptr md = c_mul(std::move(ma), std::move(mb)); + std::unique_ptr me = r_mul(std::move(ma), std::move(mb));