diff --git a/include/m_algorithms.h b/include/m_algorithms.h index 13c190d..a5a01ff 100644 --- a/include/m_algorithms.h +++ b/include/m_algorithms.h @@ -34,7 +34,6 @@ namespace Matrix { public: Matrix::Representation operator()( const Matrix::Representation& l) const noexcept { - return Impl().operate(l); }; @@ -58,7 +57,7 @@ namespace Matrix { static_assert(MatrixOperatable); - class SoftMax : public UnaryAdapter { + class SoftMax : public UnaryAdapter { public: Matrix::Representation operate( @@ -68,12 +67,20 @@ namespace Matrix { static_assert(MatrixOperatable); - // class Transpose : public UnaryAdapter { + class Transpose : public UnaryAdapter { + + public: + Matrix::Representation operate( + const Matrix::Representation& m) const noexcept; + }; + + static_assert(MatrixOperatable); + - // public: - // Matrix::Representation operate( - // const Matrix::Representation& m) const noexcept; - // }; + void transpose_helper( + std::vector::const_iterator in, + std::vector::iterator out, + int rb, int re, int cb, int ce, int rows, int cols) noexcept; } diff --git a/include/matrix_benchmark.h b/include/matrix_benchmark.h index 144cf29..58fc145 100644 --- a/include/matrix_benchmark.h +++ b/include/matrix_benchmark.h @@ -3,6 +3,8 @@ #include #include +#include +#include #include "matrix.h" #include "m_algorithms.h" @@ -37,28 +39,51 @@ namespace Matrix { std::cout << std::endl << "Performed in " << mul_bm_r.get_computation_duration_ms() << " ms." << std::endl; */ - template + + template class Timer { public: - Timer(Operator _m) : + explicit Timer(T _m) : matrix_operation(_m) {} - Representation operator()( - const Representation& l, - const Representation& r); - int get_computation_duration_ms() { return std::chrono::duration_cast>(end - start).count(); } std::chrono::steady_clock::time_point get_start() { return start; } std::chrono::steady_clock::time_point get_end() { return end; } - + + + template + Representation operator()( + const Matrix::Representation& l, + const Matrix::Representation& r) noexcept { + + start = std::chrono::steady_clock::now(); + Matrix::Representation mc = matrix_operation(l, r); + end = std::chrono::steady_clock::now(); + + return Matrix::Representation{mc}; + } + + template + Representation operator()( + const Matrix::Representation& l) noexcept { + + start = std::chrono::steady_clock::now(); + Matrix::Representation mc = matrix_operation(l); + end = std::chrono::steady_clock::now(); + + return Matrix::Representation{mc}; + } + private: - Operator matrix_operation; + + T matrix_operation; std::chrono::steady_clock::time_point start; std::chrono::steady_clock::time_point end; }; + } diff --git a/include/matrix_printer.h b/include/matrix_printer.h index f26f2bc..4d7e328 100644 --- a/include/matrix_printer.h +++ b/include/matrix_printer.h @@ -9,7 +9,7 @@ namespace Matrix { class Printer { public: - std::unique_ptr operator()(std::unique_ptr m); + void operator()(const Matrix::Representation& m) noexcept; }; diff --git a/m_algorithms.cpp b/m_algorithms.cpp index 9bf1445..0f47693 100644 --- a/m_algorithms.cpp +++ b/m_algorithms.cpp @@ -50,6 +50,73 @@ namespace Matrix { return Matrix::Representation{output}; } + Matrix::Representation Transpose::operate( + const Matrix::Representation& m) const noexcept { + + Matrix::Representation output = Matrix::Representation{ + Matrix::Rows(m.num_cols()), + Matrix::Columns(m.num_rows()) + }; + + transpose_helper( + m.constScanStart(), + output.scanStart(), + 0, m.num_rows(), + 0, m.num_cols(), + m.num_rows(), m.num_cols()); + + return Matrix::Representation{output}; + } + + void transpose_helper( + std::vector::const_iterator in, + std::vector::iterator out, + int rb, int re, int cb, int ce, int rows, int cols) noexcept { + + int r = re - rb, c = ce - cb; + if (r <= 16 && c <= 16) { + for (int i = rb; i < re; i++) { + for (int j = cb; j < ce; j++) { + *(out + (j * rows + i)) = *(in + (i * cols + j)); + } + } + } else if (r >= c) { + cilk_spawn transpose_helper(in, out, rb, rb + (r / 2), cb, ce, rows, cols); + transpose_helper(in, out, rb + (r / 2), re, cb, ce, rows, cols); + cilk_sync; + } else { + cilk_spawn transpose_helper(in, out, rb, re, cb, cb + (c / 2), rows, cols); + transpose_helper(in, out, rb, re, cb + (c / 2), ce, rows, cols); + cilk_sync; + } + } + + + // int transpose( double *a, int ndra, int nr, int nc, double *b, int ndrb ) { + // if (nr < 32) { + // for (int i = 0; i < n; i++) + // for (int j = 0; j < i; j++) + // a[j * N + i] + // a[i * N + j]; + // transposeBase(a, ndra, nr, nc, b, ndrb ); + // } + // else { + // /* subdivide the long side */ + // if (nr > nc) { + // transpose(a, ndra, nr/2, nc, b, ndrb ); + // transpose(a + nr/2 ,ndra, nr-nr/2, nc, b+(nr/2)*ndrb, ndrb ); + // } + // else { + // transpose(a, ndra, nr, nc/2, b, ndrb ); + // transpose(a + ndra*(nc/2), ndra, nr, nc-nc/2, b+nc/2, ndrb ); + // } + // } + // } + + // void add_matmul_rec(std::vector::const_iterator a, std::vector::const_iterator b, std::vector::iterator c, + // int m, int n, int p, int fdA, int fdB, int fdC) noexcept { + + // } } // Unary @@ -174,6 +241,8 @@ namespace Matrix { return Matrix::Representation{output}; } + + } @@ -279,6 +348,8 @@ namespace Matrix { /* Adapted from https://ocw.mit.edu/courses/mathematics/18-335j-introduction-to-numerical-methods-spring-2019/week-5/MIT18_335JS19_lec12.pdf + + We need to divide the data until it fits into lowest cache. */ void add_matmul_rec(std::vector::const_iterator a, std::vector::const_iterator b, std::vector::iterator c, int m, int n, int p, int fdA, int fdB, int fdC) noexcept { diff --git a/matrix_benchmark.cpp b/matrix_benchmark.cpp index 1eb3f35..5153131 100644 --- a/matrix_benchmark.cpp +++ b/matrix_benchmark.cpp @@ -11,36 +11,42 @@ namespace Matrix { - template - Representation Operations::Timer::operator()(const Matrix::Representation& l, - const Matrix::Representation& r) { + // template + // Representation Operations::Timer::operator()(const Matrix::Representation& l, + // const Matrix::Representation& r) { - Matrix::Representation mc; + // start = std::chrono::steady_clock::now(); + // Matrix::Representation mc = matrix_operation(l); + // end = std::chrono::steady_clock::now(); - start = std::chrono::steady_clock::now(); - if constexpr (Matrix::Operations::UnaryMatrixOperatable) { - mc = matrix_operation(l); - } - else if constexpr (Matrix::Operations::BinaryMatrixOperatable) { - mc = matrix_operation(l, r); - } + + // return Matrix::Representation{mc}; + // } - end = std::chrono::steady_clock::now(); - - return Matrix::Representation{mc}; - } + // template + // Representation Operations::Timer::operator()(const Matrix::Representation& l, + // const Matrix::Representation& r) { + + // start = std::chrono::steady_clock::now(); + // Matrix::Representation mc = matrix_operation(l, r); + // end = std::chrono::steady_clock::now(); + + + // return Matrix::Representation{mc}; + // } - template class Operations::Timer; - template class Operations::Timer; - template class Operations::Timer; - template class Operations::Timer; - template class Operations::Timer; - template class Operations::Timer; - template class Operations::Timer; - template class Operations::Timer; - template class Operations::Timer; + // template class Operations::Timer; + // template class Operations::Timer; + // template class Operations::Timer; + // template class Operations::Timer; + // template class Operations::Timer; + // template class Operations::Timer; + // template class Operations::Timer; + // template class Operations::Timer; + // template class Operations::Timer; + // template class Operations::Timer; diff --git a/matrix_printer.cpp b/matrix_printer.cpp index 1ef33bf..6ec46fe 100644 --- a/matrix_printer.cpp +++ b/matrix_printer.cpp @@ -9,20 +9,16 @@ #define MAX_PRINT_LENGTH 5 -std::unique_ptr Matrix::Printer::operator()(std::unique_ptr m) { +void Matrix::Printer::operator()(const Matrix::Representation& m) noexcept { - if (m == nullptr) { - throw std::invalid_argument("Matrix has no data (pointing to null)."); - } - bool valid_column_print = true; bool reached_end_row = true; bool before_max_width = true; bool last_column_val = true; - u_int64_t n_cols = m->num_cols(); - u_int64_t n_rows = m->num_rows(); + u_int64_t n_cols = m.num_cols(); + u_int64_t n_rows = m.num_rows(); uint64_t total_iter = n_rows * n_cols; std::cout << "Matrix[R=" << n_rows << "][C=" << n_cols << "]:"; @@ -41,7 +37,7 @@ std::unique_ptr Matrix::Printer::operator()(std::unique_ if (before_max_width || last_column_val) { - std::cout << m->get(i / n_cols, i % n_cols) << " "; + std::cout << m.get(i / n_cols, i % n_cols) << " "; valid_column_print = true; } else if (!valid_column_print) {} @@ -54,5 +50,4 @@ std::unique_ptr Matrix::Printer::operator()(std::unique_ std::cout << std::endl << std::endl; - return m; } \ No newline at end of file