From 74c90bf1157e335de1e608d1c40de5123887a42d Mon Sep 17 00:00:00 2001 From: Colin Unger Date: Sat, 16 Mar 2024 10:13:36 -0700 Subject: [PATCH] Remove unnecessary dependencies and allow using external installs (#1321) * Remove unnecessary dependencies and allow using external installs * Cleanup before PR and format * Add FindNCCL from torch, remove unnecessary cmake files * Cleanup nccl cmake * Fix remaining optional bugs in kernels * Try nix-based CI * Fix nix-develop version * Fix flake devshell name * Move ci build code to script * Satisfy shellcheck * Try removing ccache * Attempt to skip ccache install * Move to using in-flake caches * Format --- .flake/pkgs/legion.nix | 53 ++++ .flake/pkgs/tokenizers-cpp.nix | 43 ++++ .github/workflows/helpers/build_cuda.sh | 29 +++ .github/workflows/per-lib-check.yml | 45 ++-- .gitmodules | 18 -- CMakeLists.txt | 14 +- cmake/Modules/FindNCCL.cmake | 175 +++++++++++++ cmake/any.cmake | 16 -- cmake/fmt.cmake | 8 +- cmake/invoke.cmake | 5 - cmake/json.cmake | 12 +- cmake/nccl.cmake | 121 ++------- cmake/optional.cmake | 4 - cmake/spdlog.cmake | 8 +- cmake/variant.cmake | 5 - config/config.linux | 2 +- deps/any | 1 - deps/googletest | 1 - deps/invoke | 1 - deps/optional | 1 - deps/pybind11 | 1 - deps/variant | 1 - flake.lock | 60 +++++ flake.nix | 99 ++++++++ lib/kernels/CMakeLists.txt | 12 +- lib/kernels/include/kernels/array_shape.h | 7 +- lib/kernels/include/kernels/device.h | 1 + lib/kernels/include/kernels/perf_metrics.h | 30 +-- lib/kernels/src/cuda/batch_norm_kernels.cu | 30 +-- lib/kernels/src/cuda/cast_kernels.cu | 6 +- lib/kernels/src/device.h | 1 + lib/kernels/src/perf_metrics.cc | 16 +- lib/op-attrs/include/op-attrs/datatype.h | 14 +- lib/op-attrs/include/op-attrs/get_op_type.h | 2 +- .../include/op-attrs/get_output_shapes.h | 8 +- .../include/op-attrs/operator_attrs.h | 54 ++-- lib/op-attrs/include/op-attrs/ops/conv_2d.h | 4 +- lib/op-attrs/include/op-attrs/ops/linear.h | 12 +- .../include/op-attrs/ops/loss_functions.h | 4 +- lib/op-attrs/src/operator_attrs.cc | 4 +- .../src/parallel_dim_mapping_record.cc | 4 +- .../src/parallel_dim_mapping_record.h | 7 +- .../src/parallel_dim_mapping_record_solver.cc | 8 +- .../src/parallel_dim_mapping_record_solver.h | 8 +- .../include/pcg/computation_graph_builder.h | 237 ++++++++++-------- lib/pcg/include/pcg/device_id.h | 3 +- .../include/pcg/file_format/v1/data_type.h | 4 +- lib/pcg/include/pcg/file_format/v1/graphs.h | 2 +- .../include/pcg/file_format/v1/initializer.h | 18 +- .../pcg/file_format/v1/operator_attrs.h | 6 +- .../pcg/file_format/v1/parallel_tensor.h | 10 +- lib/pcg/include/pcg/file_format/v1/tensor.h | 10 +- lib/pcg/include/pcg/initializer.h | 18 +- lib/pcg/include/pcg/layer.h | 7 +- lib/pcg/include/pcg/operator.h | 3 +- lib/pcg/include/pcg/parallel_tensor.h | 12 +- lib/pcg/include/pcg/tensor.h | 6 +- lib/pcg/src/computation_graph_builder.cc | 136 +++++----- lib/pcg/src/device_id.cc | 5 +- lib/pcg/src/layer.cc | 2 +- lib/pcg/src/operator.cc | 2 +- lib/pcg/src/parallel_tensor.cc | 4 +- lib/runtime/src/serialization.h | 10 +- lib/substitutions/src/substitution.cc | 2 +- lib/utils/CMakeLists.txt | 4 - lib/utils/include/utils/containers.decl.h | 23 +- lib/utils/include/utils/containers.h | 26 +- lib/utils/include/utils/disjoint_set.h | 33 +-- lib/utils/include/utils/dot_file.h | 13 +- lib/utils/include/utils/fmt.h | 18 +- lib/utils/include/utils/graph/algorithms.h | 30 +-- .../graph/labelled/output_labelled_open.h | 4 +- lib/utils/include/utils/graph/open_edge.h | 6 +- lib/utils/include/utils/graph/query_set.h | 8 +- .../include/utils/graph/serialparallel.h | 10 +- lib/utils/include/utils/graph/traversal.h | 4 +- lib/utils/include/utils/graph/views.h | 5 +- lib/utils/include/utils/invoke.h | 12 - lib/utils/include/utils/json.h | 27 +- lib/utils/include/utils/optional.decl | 8 +- lib/utils/include/utils/optional.h | 8 +- lib/utils/include/utils/sequence.h | 4 +- lib/utils/include/utils/stack_map.h | 9 +- lib/utils/include/utils/stack_vector.h | 11 +- lib/utils/include/utils/tuple.h | 11 +- lib/utils/include/utils/type_traits.h | 1 - lib/utils/include/utils/variant.h | 125 +++++---- lib/utils/include/utils/visitable.h | 6 +- lib/utils/src/graph/algorithms.cc | 25 +- lib/utils/src/graph/serialparallel.cc | 24 +- lib/utils/src/graph/serialparallel_internal.h | 6 +- lib/utils/src/graph/traversal.cc | 4 +- lib/utils/src/stack_vector.cc | 1 + lib/utils/src/tuple.cc | 1 + 94 files changed, 1130 insertions(+), 789 deletions(-) create mode 100644 .flake/pkgs/legion.nix create mode 100644 .flake/pkgs/tokenizers-cpp.nix create mode 100755 .github/workflows/helpers/build_cuda.sh create mode 100644 cmake/Modules/FindNCCL.cmake delete mode 100644 cmake/any.cmake delete mode 100644 cmake/invoke.cmake delete mode 100644 cmake/optional.cmake delete mode 100644 cmake/variant.cmake delete mode 160000 deps/any delete mode 160000 deps/googletest delete mode 160000 deps/invoke delete mode 160000 deps/optional delete mode 160000 deps/pybind11 delete mode 160000 deps/variant create mode 100644 flake.lock create mode 100644 flake.nix delete mode 100644 lib/utils/include/utils/invoke.h create mode 100644 lib/utils/src/stack_vector.cc create mode 100644 lib/utils/src/tuple.cc diff --git a/.flake/pkgs/legion.nix b/.flake/pkgs/legion.nix new file mode 100644 index 0000000000..814ef85e00 --- /dev/null +++ b/.flake/pkgs/legion.nix @@ -0,0 +1,53 @@ +{ lib +, stdenv +, fetchFromGitLab +, cmake +, python3 +, cudaPackages ? { } +, cudaCapabilities ? [ "60" "70" "80" "86" ] +, maxDim ? 5 +}: + +# from https://codeberg.org/Uli/nix-things/src/commit/776519e382c81b136c1d0b10d8c7b52b4acb9192/overlays/cq/python/libclang-python.nix + +let + cmakeFlag = x: if x then "1" else "0"; + + inherit (cudaPackages) cudatoolkit; +in + +stdenv.mkDerivation rec { + pname = "legion_flexflow"; + version = "2024-03-13"; + + src = fetchFromGitLab { + owner = "StanfordLegion"; + repo = "legion"; + rev = "24e8c452341dea41427e0ce61e154d61715e6835"; + sha256 = "sha256-NjCSjphOIew/V24i74I6DModSGcWKLeiSIjts3cFtx4="; + fetchSubmodules = true; + }; + + nativeBuildInputs = [ + cmake + ]; + + cmakeFlags = [ + "-DLegion_USE_Python=1" + "-DLegion_BUILD_BINDINGS=1" + "-DLegion_USE_CUDA=1" + "-DLegion_CUDA_ARCH=${lib.concatStringsSep "," cudaCapabilities}" + "-DLegion_MAX_DIM=${toString maxDim}" + ]; + + buildInputs = [ + python3 + cudatoolkit + ]; + + meta = with lib; { + description = "Legion is a parallel programming model for distributed, heterogeneous machines"; + homepage = "https://github.com/StanfordLegion/legion"; + license = licenses.asl20; + }; +} diff --git a/.flake/pkgs/tokenizers-cpp.nix b/.flake/pkgs/tokenizers-cpp.nix new file mode 100644 index 0000000000..a705667ae6 --- /dev/null +++ b/.flake/pkgs/tokenizers-cpp.nix @@ -0,0 +1,43 @@ +{ lib +, stdenv +, fetchFromGitHub +, cmake +, rustc +, cargo +}: + +stdenv.mkDerivation rec { + pname = "tokenizers-cpp"; + version = "2024-03-13"; + + src = fetchFromGitHub { + owner = "mlc-ai"; + repo = "tokenizers-cpp"; + rev = "4f42c9fa74946d70af86671a3804b6f2433e5dac"; + sha256 = "sha256-p7OYx9RVnKUAuMexy3WjW2zyfMJ/Q9ss4xFLsbQK7wA="; + fetchSubmodules = true; + }; + + nativeBuildInputs = [ + cmake + rustc + ]; + + # cmakeFlags = [ + # "-DLegion_USE_Python=1" + # "-DLegion_BUILD_BINDINGS=1" + # "-DLegion_USE_CUDA=1" + # "-DLegion_CUDA_ARCH=${lib.concatStringsSep "," cudaCapabilities}" + # ]; + + buildInputs = [ ]; + # python3 + # cudatoolkit + # ]; + + meta = with lib; { + description = "Universal cross-platform tokenizers binding to HF and sentencepiece"; + homepage = "https://github.com/mlc-ai/tokenizers-cpp"; + license = licenses.asl20; + }; +} diff --git a/.github/workflows/helpers/build_cuda.sh b/.github/workflows/helpers/build_cuda.sh new file mode 100755 index 0000000000..3524f885a7 --- /dev/null +++ b/.github/workflows/helpers/build_cuda.sh @@ -0,0 +1,29 @@ +#! /usr/bin/env bash + +set -euo pipefail +set -x + +DIR="$(realpath -- "$(dirname "${BASH_SOURCE[0]}")")" +REPO="$(realpath -- "$DIR/../../../")" + +export FF_GPU_BACKEND="cuda" +export FF_CUDA_ARCH=70 +cd "$REPO" +mkdir build +cd build +#if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then +# export FF_BUILD_ALL_EXAMPLES=ON +# export FF_BUILD_UNIT_TESTS=ON +#fi +../config/config.linux \ + -DCMAKE_CXX_COMPILER="clang++" \ + -DCMAKE_C_COMPILER="clang" \ + -DCMAKE_C_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \ + -DFF_USE_EXTERNAL_LEGION=ON \ + -DFF_USE_EXTERNAL_JSON=ON \ + -DFF_USE_EXTERNAL_FMT=ON \ + -DFF_USE_EXTERNAL_SPDLOG=ON + +# vim: set tabstop=2 shiftwidth=2 expandtab: diff --git a/.github/workflows/per-lib-check.yml b/.github/workflows/per-lib-check.yml index f21621b265..fa8252bc20 100644 --- a/.github/workflows/per-lib-check.yml +++ b/.github/workflows/per-lib-check.yml @@ -8,9 +8,7 @@ jobs: cmake-build: name: Library CMake Build runs-on: ubuntu-20.04 - defaults: - run: - shell: bash -l {0} # required to use an activated conda environment + strategy: max-parallel: 1 matrix: @@ -22,23 +20,27 @@ jobs: with: submodules: recursive - - name: Free additional space on runner - run: .github/workflows/helpers/free_space_on_runner.sh + - name: Install nix + uses: cachix/install-nix-action@v25 + with: + github_access_token: '${{ secrets.GITHUB_TOKEN }}' - - name: Install CUDA - uses: Jimver/cuda-toolkit@v0.2.11 - id: cuda-toolkit + - uses: cachix/cachix-action@v14 with: - cuda: "12.1.0" - # Disable caching of the CUDA binaries, since it does not give us any significant performance improvement - use-github-cache: "false" - linux-local-args: '["--toolkit"]' + name: ff + skipPush: true + # authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}' + + - name: setup nix develop shell + uses: nicknovitski/nix-develop@v1.1.0 + with: + arguments: "--accept-flake-config .#ci" - name: ccache uses: hendrikmuhs/ccache-action@v1.2 - - name: Install system dependencies - run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh + # - name: Install system dependencies + # run: FF_GPU_BACKEND=${{ matrix.gpu_backend }} .github/workflows/helpers/install_dependencies.sh # - name: Install conda and FlexFlow dependencies # uses: conda-incubator/setup-miniconda@v2 @@ -49,20 +51,7 @@ jobs: - name: Run cmake run: | - export CUDNN_DIR=/usr/local/cuda - export CUDA_DIR=/usr/local/cuda - export FF_HOME=$(pwd) - export FF_GPU_BACKEND=${{ matrix.gpu_backend }} - export FF_CUDA_ARCH=70 - n_build_cores=$(( $(nproc) cores_available -1 )) - if (( $n_build_cores < 1 )) ; then n_build_cores=1 ; fi - mkdir build - cd build - #if [[ "${FF_GPU_BACKEND}" == "cuda" ]]; then - # export FF_BUILD_ALL_EXAMPLES=ON - # export FF_BUILD_UNIT_TESTS=ON - #fi - ../config/config.linux -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_CUDA_COMPILER_LAUNCHER=ccache + .github/workflows/helpers/build_${{ matrix.gpu_backend }}.sh - name: Build utils run: | diff --git a/.gitmodules b/.gitmodules index e6068aa368..7ee487bb6a 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,18 +4,6 @@ [submodule "deps/nccl"] path = deps/nccl url = https://github.com/NVIDIA/nccl.git -[submodule "deps/pybind11"] - path = deps/pybind11 - url = https://github.com/pybind/pybind11.git -[submodule "deps/googletest"] - path = deps/googletest - url = https://github.com/google/googletest.git -[submodule "deps/variant"] - path = deps/variant - url = https://github.com/mpark/variant -[submodule "deps/optional"] - path = deps/optional - url = https://github.com/TartanLlama/optional.git [submodule "deps/json"] path = deps/json url = https://github.com/nlohmann/json.git @@ -37,9 +25,3 @@ [submodule "deps/fmt"] path = deps/fmt url = https://github.com/fmtlib/fmt.git -[submodule "deps/invoke"] - path = deps/invoke - url = https://github.com/BlackMATov/invoke.hpp.git -[submodule "deps/any"] - path = deps/any - url = https://github.com/thelink2012/any.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 418a2a7538..e04aa622c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,12 @@ cmake_minimum_required(VERSION 3.10) project(FlexFlow) -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_LIST_DIR}/cmake) +set( + CMAKE_MODULE_PATH + ${CMAKE_MODULE_PATH} + ${CMAKE_CURRENT_LIST_DIR}/cmake + ${CMAKE_CURRENT_LIST_DIR}/cmake/Modules +) # Detect OS type and Linux version (if it applies) set(LINUX_VERSION "") @@ -28,7 +33,7 @@ set(FF_MAX_NUM_TASK_REGIONS "20" CACHE STRING set(FF_MAX_NUM_TASK_ARGUMENTS "5" CACHE STRING "Maximum number of arguments that can be declared in a TaskSignature") option(FF_USE_NCCL "Run FlexFlow with NCCL" OFF) -option(FF_USE_PREBUILT_NCCL "Enable use of NCCL pre-compiled library, if available" ON) +option(FF_USE_EXTERNAL_NCCL "Enable use of NCCL pre-compiled library, if available" ON) option(FF_USE_PREBUILT_LEGION "Enable use of Legion pre-compiled library, if available" ON) option(FF_USE_ALL_PREBUILT_LIBRARIES "Enable use of all pre-compiled libraries, if available" OFF) option(FF_USE_PYTHON "Enable Python" ON) @@ -77,20 +82,15 @@ include(nccl) # set_property(CACHE FF_GPU_BACKEND PROPERTY STRINGS ${FF_GPU_BACKENDS}) include(json) -include(optional) include(expected) include(spdlog) -include(variant) include(doctest) include(visit_struct) include(CTest) include(fmt) include(legion) include(rapidcheck) -include(invoke) -include(any) #include(gtest) -#include(fmt) include(flexflow-utils) diff --git a/cmake/Modules/FindNCCL.cmake b/cmake/Modules/FindNCCL.cmake new file mode 100644 index 0000000000..796818c0cf --- /dev/null +++ b/cmake/Modules/FindNCCL.cmake @@ -0,0 +1,175 @@ +# from https://github.com/pytorch/pytorch/blob/818b14025a1d70872b52d28a1e83e7797f6e271a/cmake/Modules/FindNCCL.cmake + +################################################################################ +# +# From PyTorch: +# +# Copyright (c) 2016- Facebook, Inc (Adam Paszke) +# Copyright (c) 2014- Facebook, Inc (Soumith Chintala) +# Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert) +# Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu) +# Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu) +# Copyright (c) 2011-2013 NYU (Clement Farabet) +# Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston) +# Copyright (c) 2006 Idiap Research Institute (Samy Bengio) +# Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz) +# +# From Caffe2: +# +# Copyright (c) 2016-present, Facebook Inc. All rights reserved. +# +# All contributions by Facebook: +# Copyright (c) 2016 Facebook Inc. +# +# All contributions by Google: +# Copyright (c) 2015 Google Inc. +# All rights reserved. +# +# All contributions by Yangqing Jia: +# Copyright (c) 2015 Yangqing Jia +# All rights reserved. +# +# All contributions by Kakao Brain: +# Copyright 2019-2020 Kakao Brain +# +# All contributions by Cruise LLC: +# Copyright (c) 2022 Cruise LLC. +# All rights reserved. +# +# All contributions from Caffe: +# Copyright(c) 2013, 2014, 2015, the respective contributors +# All rights reserved. +# +# All other contributions: +# Copyright(c) 2015, 2016 the respective contributors +# All rights reserved. +# +# Caffe2 uses a copyright model similar to Caffe: each contributor holds +# copyright over their contributions to Caffe2. The project versioning records +# all such contribution and copyright details. If a contributor wants to further +# mark their specific copyright on a particular contribution, they should +# indicate their copyright solely in the commit message of the change when it is +# committed. +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America +# and IDIAP Research Institute nor the names of its contributors may be +# used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +################################################################################ + + +# Find the nccl libraries +# +# The following variables are optionally searched for defaults +# NCCL_ROOT: Base directory where all NCCL components are found +# NCCL_INCLUDE_DIR: Directory where NCCL header is found +# NCCL_LIB_DIR: Directory where NCCL library is found +# +# The following are set after configuration is done: +# NCCL_FOUND +# NCCL_INCLUDE_DIRS +# NCCL_LIBRARIES +# +# The path hints include CUDA_TOOLKIT_ROOT_DIR seeing as some folks +# install NCCL in the same location as the CUDA toolkit. +# See https://github.com/caffe2/caffe2/issues/1601 + +set(NCCL_INCLUDE_DIR $ENV{NCCL_INCLUDE_DIR} CACHE PATH "Folder contains NVIDIA NCCL headers") +set(NCCL_LIB_DIR $ENV{NCCL_LIB_DIR} CACHE PATH "Folder contains NVIDIA NCCL libraries") +set(NCCL_VERSION $ENV{NCCL_VERSION} CACHE STRING "Version of NCCL to build with") + +if ($ENV{NCCL_ROOT_DIR}) + message(WARNING "NCCL_ROOT_DIR is deprecated. Please set NCCL_ROOT instead.") +endif() +list(APPEND NCCL_ROOT $ENV{NCCL_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}) +# Compatible layer for CMake <3.12. NCCL_ROOT will be accounted in for searching paths and libraries for CMake >=3.12. +list(APPEND CMAKE_PREFIX_PATH ${NCCL_ROOT}) + +find_path(NCCL_INCLUDE_DIRS + NAMES nccl.h + HINTS ${NCCL_INCLUDE_DIR}) + +if (USE_STATIC_NCCL) + MESSAGE(STATUS "USE_STATIC_NCCL is set. Linking with static NCCL library.") + SET(NCCL_LIBNAME "nccl_static") + if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified + set(CMAKE_FIND_LIBRARY_SUFFIXES ".a.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif() +else() + SET(NCCL_LIBNAME "nccl") + if (NCCL_VERSION) # Prefer the versioned library if a specific NCCL version is specified + set(CMAKE_FIND_LIBRARY_SUFFIXES ".so.${NCCL_VERSION}" ${CMAKE_FIND_LIBRARY_SUFFIXES}) + endif() +endif() + +find_library(NCCL_LIBRARIES + NAMES ${NCCL_LIBNAME} + HINTS ${NCCL_LIB_DIR}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIRS NCCL_LIBRARIES) + +if(NCCL_FOUND) # obtaining NCCL version and some sanity checks + set (NCCL_HEADER_FILE "${NCCL_INCLUDE_DIRS}/nccl.h") + message (STATUS "Determining NCCL version from ${NCCL_HEADER_FILE}...") + set (OLD_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) + list (APPEND CMAKE_REQUIRED_INCLUDES ${NCCL_INCLUDE_DIRS}) + include(CheckCXXSymbolExists) + check_cxx_symbol_exists(NCCL_VERSION_CODE nccl.h NCCL_VERSION_DEFINED) + + if (NCCL_VERSION_DEFINED) + set(file "${PROJECT_BINARY_DIR}/detect_nccl_version.cc") + file(WRITE ${file} " + #include + #include + int main() + { + std::cout << NCCL_MAJOR << '.' << NCCL_MINOR << '.' << NCCL_PATCH << std::endl; + + int x; + ncclGetVersion(&x); + return x == NCCL_VERSION_CODE; + } +") + try_run(NCCL_VERSION_MATCHED compile_result ${PROJECT_BINARY_DIR} ${file} + RUN_OUTPUT_VARIABLE NCCL_VERSION_FROM_HEADER + CMAKE_FLAGS "-DINCLUDE_DIRECTORIES=${NCCL_INCLUDE_DIRS}" + LINK_LIBRARIES ${NCCL_LIBRARIES}) + if (NOT NCCL_VERSION_MATCHED) + message(FATAL_ERROR "Found NCCL header version and library version do not match! \ +(include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES}) Please set NCCL_INCLUDE_DIR and NCCL_LIB_DIR manually.") + endif() + message(STATUS "NCCL version: ${NCCL_VERSION_FROM_HEADER}") + else() + message(STATUS "NCCL version < 2.3.5-5") + endif () + set (CMAKE_REQUIRED_INCLUDES ${OLD_CMAKE_REQUIRED_INCLUDES}) + + message(STATUS "Found NCCL (include: ${NCCL_INCLUDE_DIRS}, library: ${NCCL_LIBRARIES})") + mark_as_advanced(NCCL_ROOT_DIR NCCL_INCLUDE_DIRS NCCL_LIBRARIES) +endif() diff --git a/cmake/any.cmake b/cmake/any.cmake deleted file mode 100644 index 9a6164da4f..0000000000 --- a/cmake/any.cmake +++ /dev/null @@ -1,16 +0,0 @@ -add_library( - any - INTERFACE -) -target_include_directories( - any - INTERFACE - ${CMAKE_CURRENT_SOURCE_DIR}/deps/any/ -) -set_target_properties( - any - PROPERTIES - CXX_STANDARD 11 - CXX_STANDARD_REQUIRED YES - CXX_EXTENSIONS NO -) diff --git a/cmake/fmt.cmake b/cmake/fmt.cmake index 9eeb85611c..283caad69d 100644 --- a/cmake/fmt.cmake +++ b/cmake/fmt.cmake @@ -1,5 +1,9 @@ include(aliasing) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/fmt) +if (FF_USE_EXTERNAL_FMT) + find_package(fmt REQUIRED) +else() + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/fmt) -alias_library(fmt fmt::fmt) + alias_library(fmt fmt::fmt) +endif() diff --git a/cmake/invoke.cmake b/cmake/invoke.cmake deleted file mode 100644 index 3ec406ed05..0000000000 --- a/cmake/invoke.cmake +++ /dev/null @@ -1,5 +0,0 @@ -include(aliasing) - -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/invoke) - -alias_library(invoke invoke.hpp::invoke.hpp) diff --git a/cmake/json.cmake b/cmake/json.cmake index 97d4e5f9f7..093ec51cdc 100644 --- a/cmake/json.cmake +++ b/cmake/json.cmake @@ -1,6 +1,12 @@ include(aliasing) -set(JSON_BuildTests OFF CACHE INTERNAL "") -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/json) +if (FF_USE_EXTERNAL_JSON) + find_package(nlohmann_json REQUIRED) -alias_library(json nlohmann_json::nlohmann_json) + alias_library(json nlohmann_json) +else() + set(JSON_BuildTests OFF CACHE INTERNAL "") + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/json) + + alias_library(json nlohmann_json::nlohmann_json) +endif() diff --git a/cmake/nccl.cmake b/cmake/nccl.cmake index 12062958cd..e89bee04c6 100644 --- a/cmake/nccl.cmake +++ b/cmake/nccl.cmake @@ -1,109 +1,36 @@ -set(NCCL_NAME nccl_internal) -# set(NCCL_CUDA_ARCH "-gencode=arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}") -# message("NCCL_CUDA_ARCH: ${NCCL_CUDA_ARCH}") +include(aliasing) -set(NCCL_URL "") -if((FF_USE_PREBUILT_NCCL OR FF_USE_ALL_PREBUILT_LIBRARIES) AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "x86_64") - if(LINUX_VERSION MATCHES "20.04") - if (CUDA_VERSION VERSION_EQUAL "11.0") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.0.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.1") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.1.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.2") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.2.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.3") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.3.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.4") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.4.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.5") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.5.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.6") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.6.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.7") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-20.04_11.7.0.tar.gz") - endif() - elseif(LINUX_VERSION MATCHES "18.04") - if (CUDA_VERSION VERSION_EQUAL "10.1") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_10.1.243.tar.gz") - elseif (CUDA_VERSION VERSION_EQUAL "10.2") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_10.2.89.tar.gz") - elseif (CUDA_VERSION VERSION_EQUAL "11.0") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.0.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.1") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.1.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.2") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.2.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.3") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.3.1.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.4") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.4.3.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.5") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.5.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.6") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.6.2.tar.gz") - elseif(CUDA_VERSION VERSION_EQUAL "11.7") - set(NCCL_URL "https://github.com/flexflow/flexflow-third-party/releases/latest/download/nccl_ubuntu-18.04_11.7.0.tar.gz") - endif() - endif() -endif() - -if(NCCL_URL) - # Download and import pre-compiled NCCL library - message(STATUS "Using pre-compiled NCCL library") - message(STATUS "NCCL_URL: ${NCCL_URL}") - - include(FetchContent) - FetchContent_Declare(${NCCL_NAME} - URL ${NCCL_URL} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - ) - FetchContent_GetProperties(${NCCL_NAME}) - if(NOT ${NCCL_NAME}_POPULATED) - FetchContent_Populate(${NCCL_NAME}) - endif() - - set(NCCL_FOLDER_PATH ${${NCCL_NAME}_SOURCE_DIR}/deps/nccl) - set(NCCL_INCLUDE_DIR ${NCCL_FOLDER_PATH}/include) - set(NCCL_LIB_DIR ${NCCL_FOLDER_PATH}/lib) - message(STATUS "NCCL library path: ${NCCL_FOLDER_PATH}") - add_library(nccl SHARED IMPORTED) - set_target_properties(nccl PROPERTIES IMPORTED_LOCATION ${NCCL_FOLDER_PATH}) +add_library(nccl INTERFACE) - list(APPEND FLEXFLOW_INCLUDE_DIRS ${NCCL_INCLUDE_DIR}) - list(APPEND FLEXFLOW_EXT_LIBRARIES ${NCCL_LIB_DIR}/libnccl${LIBEXT}) - install(DIRECTORY ${NCCL_INCLUDE_DIR}/ DESTINATION include) - install(DIRECTORY ${NCCL_LIB_DIR}/ DESTINATION lib PATTERN "pkgconfig" EXCLUDE) - - set(NCCL_LIB "${INSTALL_DIR}/lib/libnccl${LIBEXT}") +if (FF_USE_EXTERNAL_NCCL) + find_package(NCCL REQUIRED) else() - # Build NCCL from source message(STATUS "Building NCCL from source") list(TRANSFORM CUDA_GENCODE PREPEND "NVCC_GENCODE=" OUTPUT_VARIABLE NCCL_BUILD_NVCC_GENCODE) - include(ExternalProject) - ExternalProject_Add(${NCCL_NAME} - SOURCE_DIR ${PROJECT_SOURCE_DIR}/deps/nccl - PREFIX ${CMAKE_BINARY_DIR}/deps/nccl - INSTALL_DIR ${CMAKE_BINARY_DIR}/deps/nccl - BUILD_BYPRODUCTS ${CMAKE_BINARY_DIR}/deps/nccl/lib/libnccl${LIBEXT} - INSTALL_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND make src.build "${NCCL_BUILD_NVCC_GENCODE}" "CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}" "BUILDDIR=${CMAKE_BINARY_DIR}/deps/nccl" "CXX=${CMAKE_CXX_COMPILER} -w" CC="${CMAKE_CC_COMPILER}" - BUILD_IN_SOURCE 1 + ExternalProject_Add(nccl_source_build + SOURCE_DIR ${PROJECT_SOURCE_DIR}/deps/${NCCL_NAME} + PREFIX ${CMAKE_BINARY_DIR}/deps/${NCCL_NAME} + INSTALL_DIR ${CMAKE_BINARY_DIR}/deps/${NCCL_NAME} + BUILD_BYPRODUCTS ${CMAKE_BINARY_DIR}/deps/${NCCL_NAME}/lib/libnccl${LIBEXT} + INSTALL_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND make src.build "${NCCL_BUILD_NVCC_GENCODE}" "CUDA_HOME=${CUDA_TOOLKIT_ROOT_DIR}" "BUILDDIR=${CMAKE_BINARY_DIR}/deps/${NCCL_NAME}" + BUILD_IN_SOURCE 1 ) - ExternalProject_Get_Property(${NCCL_NAME} INSTALL_DIR) - message(STATUS "NCCL install dir: ${INSTALL_DIR}") - set_directory_properties(PROPERTIES ADDITIONAL_CLEAN_FILES "${CMAKE_BINARY_DIR}/deps/nccl/lib/") + ExternalProject_Get_Property(nccl_source_build INSTALL_DIR) + set_directory_properties(PROPERTIES ADDITIONAL_CLEAN_FILES "${CMAKE_BINARY_DIR}/deps/nccl_source_build/lib/") + + install(DIRECTORY ${CMAKE_BINARY_DIR}/deps/nccl_source_build/include/ DESTINATION include) + install(DIRECTORY ${CMAKE_BINARY_DIR}/deps/nccl_source_build/lib/ DESTINATION lib PATTERN "pkgconfig" EXCLUDE) set(NCCL_INCLUDE_DIR "${INSTALL_DIR}/include") - set(NCCL_LIB "${INSTALL_DIR}/lib/libnccl${LIBEXT}") + set(NCCL_LIBRARIES "${INSTALL_DIR}/lib/libnccl${LIBEXT}") + + add_dependencies(nccl nccl_source_build) endif() -message("NCCL_LIB = ${NCCL_LIB}") -message("INSTALL_DIR = ${INSTALL_DIR}") -add_library(nccl INTERFACE) -target_include_directories(nccl SYSTEM INTERFACE ${NCCL_INCLUDE_DIR}) -add_dependencies(nccl ${NCCL_NAME}) -target_link_libraries(nccl INTERFACE ${NCCL_LIB}) +message(STATUS "NCCL_LIBRARIES = ${NCCL_LIBRARIES}") +target_include_directories(nccl SYSTEM INTERFACE ${NCCL_INCLUDE_DIRS}) +target_link_libraries(nccl INTERFACE ${NCCL_LIBRARIES}) diff --git a/cmake/optional.cmake b/cmake/optional.cmake deleted file mode 100644 index afaa6330c0..0000000000 --- a/cmake/optional.cmake +++ /dev/null @@ -1,4 +0,0 @@ -set(OPTIONAL_BUILD_TESTS OFF) -set(OPTIONAL_BUILD_PACKAGE OFF) - -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/optional) diff --git a/cmake/spdlog.cmake b/cmake/spdlog.cmake index a0d36fc3b2..cd18944460 100644 --- a/cmake/spdlog.cmake +++ b/cmake/spdlog.cmake @@ -1,5 +1,9 @@ include(aliasing) -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/spdlog) +if (FF_USE_EXTERNAL_SPDLOG) + find_package(spdlog REQUIRED) +else() + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/spdlog) -alias_library(spdlog spdlog::spdlog) + alias_library(spdlog spdlog::spdlog) +endif() diff --git a/cmake/variant.cmake b/cmake/variant.cmake deleted file mode 100644 index ddf5781281..0000000000 --- a/cmake/variant.cmake +++ /dev/null @@ -1,5 +0,0 @@ -include(aliasing) - -add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/deps/variant) - -alias_library(variant mpark_variant) diff --git a/config/config.linux b/config/config.linux index 2b87ec0eb5..94cb348a5a 100755 --- a/config/config.linux +++ b/config/config.linux @@ -1,4 +1,4 @@ -#!/bin/bash +#! /usr/bin/env bash # set the CC and CXX, usually it is not needed as cmake can detect it # set CC and CXX to mpicc and mpic++ when enable gasnet diff --git a/deps/any b/deps/any deleted file mode 160000 index e88b1bfc16..0000000000 --- a/deps/any +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e88b1bfc160fa9b01e6174dd29c812eeeece3be9 diff --git a/deps/googletest b/deps/googletest deleted file mode 160000 index 2fe3bd994b..0000000000 --- a/deps/googletest +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2fe3bd994b3189899d93f1d5a881e725e046fdc2 diff --git a/deps/invoke b/deps/invoke deleted file mode 160000 index 2c1eabc2e2..0000000000 --- a/deps/invoke +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 2c1eabc2e20ab02961f95c704ff0c0818671ddd1 diff --git a/deps/optional b/deps/optional deleted file mode 160000 index c28fcf74d2..0000000000 --- a/deps/optional +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c28fcf74d207fc667c4ed3dbae4c251ea551c8c1 diff --git a/deps/pybind11 b/deps/pybind11 deleted file mode 160000 index 8de7772cc7..0000000000 --- a/deps/pybind11 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 8de7772cc72daca8e947b79b83fea46214931604 diff --git a/deps/variant b/deps/variant deleted file mode 160000 index 23cb94f027..0000000000 --- a/deps/variant +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 23cb94f027d4ef33bf48133acc2695c7e5c6f1e7 diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000000..205d2b2290 --- /dev/null +++ b/flake.lock @@ -0,0 +1,60 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1689068808, + "narHash": "sha256-6ixXo3wt24N/melDWjq70UuHQLxGV8jZvooRanIHXw0=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "919d646de7be200f3bf08cb76ae1f09402b6f9b4", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1710162809, + "narHash": "sha256-i2R2bcnQp+85de67yjgZVvJhd6rRnJbSYNpGmB6Leb8=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "ddcd7598b2184008c97e6c9c6a21c5f37590b8d2", + "type": "github" + }, + "original": { + "id": "nixpkgs", + "ref": "nixos-23.11", + "type": "indirect" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000000..3d357ca86c --- /dev/null +++ b/flake.nix @@ -0,0 +1,99 @@ +{ + description = "A framework for automatic performance optimization of DNN training and inference"; + + nixConfig = { + bash-prompt-prefix = "(ff) "; + extra-substituters = [ + "https://ff.cachix.org" + "https://cuda-maintainers.cachix.org/" + ]; + extra-trusted-public-keys = [ + "cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=" + "ff.cachix.org-1:/kyZ0w35ToSJBjpiNfPLrL3zTjuPkUiqf2WH0GIShXM=" + ]; + }; + + # Nixpkgs / NixOS version to use. + inputs = { + nixpkgs.url = "nixpkgs/nixos-23.11"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils, ... }: flake-utils.lib.eachSystem [ "x86_64-linux" ] (system: + let + pkgs = import nixpkgs { + inherit system; + config.allowUnfree = true; + }; + + mkShell = pkgs.mkShell.override { + stdenv = pkgs.llvmPackages.libcxxStdenv; + }; + in + { + packages = { + legion = pkgs.callPackage ./.flake/pkgs/legion.nix { }; + }; + + devShells = rec { + ci = mkShell { + buildInputs = (with pkgs; [ + llvmPackages_17.clang + cmakeCurses + gcc10Stdenv + gcc10 + ccache + cudatoolkit + zlib + pkg-config + python3 + self.packages.${system}.legion + nlohmann_json + spdlog + range-v3 + rapidcheck + doctest + fmt + cudaPackages.cuda_nvcc + cudaPackages.cudnn + cudaPackages.nccl + cudaPackages.libcublas + cudaPackages.cuda_cudart + ]) ++ (with pkgs.python3Packages; [ + ]); + }; + + default = mkShell { + inputsFrom = [ ci ]; + + buildInputs = builtins.concatLists [ + (with pkgs; [ + clang-tools_17 + gh-markdown-preview + plantuml + gdb + ruff + compdb + jq + gh + ]) + (with pkgs.python3Packages; [ + gitpython + ipython + mypy + python-lsp-server + pylsp-mypy + python-lsp-ruff + pygithub + sqlitedict + frozendict + black + toml + ]) + ]; + }; + }; + } + ); +} +# vim: set tabstop=2 shiftwidth=2 expandtab: diff --git a/lib/kernels/CMakeLists.txt b/lib/kernels/CMakeLists.txt index 59c7d44b60..a963c7b49b 100644 --- a/lib/kernels/CMakeLists.txt +++ b/lib/kernels/CMakeLists.txt @@ -1,12 +1,14 @@ set(project_target kernels) project(${project_target} - LANGUAGES CUDA) + LANGUAGES CXX CUDA) file(GLOB_RECURSE SRC CONFIGURE_DEPENDS LIST_DIRECTORIES False - src/*.cc) + src/*.cc + # src/*.cu + ) add_library( ${project_target} @@ -16,7 +18,7 @@ add_library( target_include_directories( ${project_target} PRIVATE - src/cuda/ + src/ PUBLIC include/ ) @@ -28,10 +30,10 @@ target_link_libraries( nccl ) -define_ff_vars(kernels) +define_ff_vars(${project_target}) set_target_properties( ${project_target} PROPERTIES - CUDA_STANDARD 11 + CUDA_STANDARD 17 ) diff --git a/lib/kernels/include/kernels/array_shape.h b/lib/kernels/include/kernels/array_shape.h index 41c8275b1c..0a0124e13c 100644 --- a/lib/kernels/include/kernels/array_shape.h +++ b/lib/kernels/include/kernels/array_shape.h @@ -2,7 +2,6 @@ #define _FLEXFLOW_KERNELS_ARRAY_SHAPE_H #include "legion_dim.h" -#include "utils/optional.h" #include "utils/stack_vector.h" #include "utils/visitable.h" #include @@ -36,11 +35,11 @@ struct ArrayShape { legion_dim_t last_idx() const; legion_dim_t neg_idx(int) const; - optional at_maybe(std::size_t) const; + std::optional at_maybe(std::size_t) const; ArrayShape reversed_dim_order() const; - ArrayShape sub_shape(optional start, - optional end); + ArrayShape sub_shape(std::optional start, + std::optional end); public: LegionTensorDims dims; diff --git a/lib/kernels/include/kernels/device.h b/lib/kernels/include/kernels/device.h index b1571da1b3..652c59e976 100644 --- a/lib/kernels/include/kernels/device.h +++ b/lib/kernels/include/kernels/device.h @@ -22,6 +22,7 @@ #endif #include +#include #include #include diff --git a/lib/kernels/include/kernels/perf_metrics.h b/lib/kernels/include/kernels/perf_metrics.h index 1a3d2509d0..c4a34e4f79 100644 --- a/lib/kernels/include/kernels/perf_metrics.h +++ b/lib/kernels/include/kernels/perf_metrics.h @@ -2,7 +2,6 @@ #define _FLEXFLOW_KERNELS_INCLUDE_KERNELS_PERF_METRICS_H #include "utils/fmt.h" -#include "utils/optional.h" #include "utils/visitable.h" namespace FlexFlow { @@ -11,23 +10,24 @@ struct PerfMetrics : public use_visitable_cmp { PerfMetrics() = delete; PerfMetrics(double start_time); PerfMetrics(int train_all, - optional train_correct, - optional cce_loss, - optional sparse_cce_loss, - optional mse_loss, - optional rmse_loss, - optional mae_loss, + std::optional train_correct, + std::optional cce_loss, + std::optional sparse_cce_loss, + std::optional mse_loss, + std::optional rmse_loss, + std::optional mae_loss, double start_time_micro, double current_time_micro); - int train_all = 0; // measure_accuracy_denominator - optional train_correct = 0; // measure_accuracy numerator - optional cce_loss = nullopt; // measure_categorical_crossentropy - optional sparse_cce_loss = - 0.0f; // measure_sparse_categorical_crossentropy - optional mse_loss = 0.0f; // measure_mean_squared_error - optional rmse_loss = 0.0f; // measure_root_mean_squared_error - optional mae_loss = 0.0f; // measure_mean_absolute_error + int train_all = 0; // measure_accuracy_denominator + std::optional train_correct = 0; // measure_accuracy numerator + std::optional cce_loss = + std::nullopt; // measure_categorical_crossentropy + std::optional sparse_cce_loss = + 0.0f; // measure_sparse_categorical_crossentropy + std::optional mse_loss = 0.0f; // measure_mean_squared_error + std::optional rmse_loss = 0.0f; // measure_root_mean_squared_error + std::optional mae_loss = 0.0f; // measure_mean_absolute_error double start_time; double current_time; }; diff --git a/lib/kernels/src/cuda/batch_norm_kernels.cu b/lib/kernels/src/cuda/batch_norm_kernels.cu index 8c5ea76f73..6529351a77 100644 --- a/lib/kernels/src/cuda/batch_norm_kernels.cu +++ b/lib/kernels/src/cuda/batch_norm_kernels.cu @@ -13,8 +13,11 @@ * limitations under the License. */ +#include "device.h" +#include "kernels/allocation.h" #include "kernels/batch_norm_kernels.h" -#include "kernels/cuda_helper.h" +#include "kernels/device.h" +#include "kernels/ff_handle.h" namespace FlexFlow { namespace Kernels { @@ -86,7 +89,7 @@ void backward_kernel(cudaStream_t stream, m->saveVar)); } -BatchNormPerDeviceState init_kernel(PerDeviceFFHandle handler, +BatchNormPerDeviceState init_kernel(PerDeviceFFHandle handle, Allocator allocator, float *runningMean, int output_n, @@ -125,19 +128,18 @@ BatchNormPerDeviceState init_kernel(PerDeviceFFHandle handler, checkCUDNN(cudnnSetTensor4dDescriptor( biasTensor, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, output_c, 1, 1)); // allocate memory for runningMean, runningVar, saveMean, saveVar - { - size_t totalSize = sizeof(float) * output_c * 4; - runningMean = (float *)allocator.allocate(totalSize); - float *runningVar = (float *)runningMean + output_c; - float *saveMean = (float *)runningVar + output_c; - float *saveVar = (float *)saveMean + output_c; - cudaStream_t stream; + size_t totalSize = sizeof(float) * output_c * 4; + runningMean = (float *)allocator.allocate(totalSize); + float *runningVar = (float *)runningMean + output_c; + float *saveMean = (float *)runningVar + output_c; + float *saveVar = (float *)saveMean + output_c; + cudaStream_t stream; + + assign_kernel<<>>( + runningMean, output_c, 0.0f); + assign_kernel<<>>( + runningVar, output_c, 0.0f); - assign_kernel<<>>( - runningMean, output_c, 0.0f); - assign_kernel<<>>( - runningVar, output_c, 0.0f); - } if (relu) { checkCUDNN(cudnnCreateActivationDescriptor(&actiDesc)); checkCUDNN(cudnnSetActivationDescriptor( diff --git a/lib/kernels/src/cuda/cast_kernels.cu b/lib/kernels/src/cuda/cast_kernels.cu index 3d8804862d..e7716ee06b 100644 --- a/lib/kernels/src/cuda/cast_kernels.cu +++ b/lib/kernels/src/cuda/cast_kernels.cu @@ -59,7 +59,8 @@ struct BackwardKernel { } }; -void forward_kernel(ffStream_t stream, +void forward_kernel(PerDeviceFFHandle handle, + ffStream_t stream, GenericTensorAccessorR const &input, GenericTensorAccessorW const &output, DataType input_type, @@ -68,7 +69,8 @@ void forward_kernel(ffStream_t stream, input_type, output_type, stream, handle, input, output); } -void backward_kernel(ffStream_t stream, +void backward_kernel(PerDeviceFFHandle handle, + ffStream_t stream, GenericTensorAccessorR const &input, GenericTensorAccessorW const &output, DataType input_type, diff --git a/lib/kernels/src/device.h b/lib/kernels/src/device.h index 5c4239a5cf..00f2888f45 100644 --- a/lib/kernels/src/device.h +++ b/lib/kernels/src/device.h @@ -5,6 +5,7 @@ #include "kernels/device.h" #include "op-attrs/datatype.h" #include "op-attrs/op.h" +#include #if defined(FF_USE_CUDA) #include diff --git a/lib/kernels/src/perf_metrics.cc b/lib/kernels/src/perf_metrics.cc index 07bb8de815..2036ddd35a 100644 --- a/lib/kernels/src/perf_metrics.cc +++ b/lib/kernels/src/perf_metrics.cc @@ -6,12 +6,12 @@ PerfMetrics::PerfMetrics(double _start_time) : start_time(_start_time), current_time(_start_time) {} PerfMetrics::PerfMetrics(int _train_all, - optional _train_correct, - optional _cce_loss, - optional _sparse_cce_loss, - optional _mse_loss, - optional _rmse_loss, - optional _mae_loss, + std::optional _train_correct, + std::optional _cce_loss, + std::optional _sparse_cce_loss, + std::optional _mse_loss, + std::optional _rmse_loss, + std::optional _mae_loss, double _start_time_micro, double _current_time_micro) : train_all(_train_all), train_correct(_train_correct), cce_loss(_cce_loss), @@ -29,7 +29,7 @@ float get_accuracy(PerfMetrics const &m) { PerfMetrics update(PerfMetrics const &lhs, PerfMetrics const &rhs) { PerfMetrics out(lhs); - auto update_val = [](optional &l, optional const &r) { + auto update_val = [](std::optional &l, std::optional const &r) { if (l.has_value()) { l.value() += r.value(); } @@ -52,7 +52,7 @@ PerfMetrics update(PerfMetrics const &lhs, PerfMetrics const &rhs) { PerfMetrics apply_scale(PerfMetrics const &pm, float scale) { PerfMetrics out(pm); - auto scale_val = [&](optional &l) { + auto scale_val = [&](std::optional &l) { if (l.has_value()) { l.value() *= scale; } diff --git a/lib/op-attrs/include/op-attrs/datatype.h b/lib/op-attrs/include/op-attrs/datatype.h index 2417f37fdb..4a8de665b4 100644 --- a/lib/op-attrs/include/op-attrs/datatype.h +++ b/lib/op-attrs/include/op-attrs/datatype.h @@ -3,7 +3,7 @@ #include "utils/fmt.h" #include "utils/fp16.h" -#include "utils/variant.h" +#include namespace FlexFlow { @@ -50,12 +50,12 @@ typename data_type_enum_to_class
::type cast_to(T t) { template using real_type = typename data_type_enum_to_class
::type; -using DataTypeValue = variant, - real_type, - real_type, - real_type, - real_type, - real_type>; +using DataTypeValue = std::variant, + real_type, + real_type, + real_type, + real_type, + real_type>; size_t size_of(DataType); diff --git a/lib/op-attrs/include/op-attrs/get_op_type.h b/lib/op-attrs/include/op-attrs/get_op_type.h index 421c464843..a2db4ab5f0 100644 --- a/lib/op-attrs/include/op-attrs/get_op_type.h +++ b/lib/op-attrs/include/op-attrs/get_op_type.h @@ -45,7 +45,7 @@ struct GetOpTypeFunctor { }; template -OperatorType get_op_type(variant const &attrs) { +OperatorType get_op_type(std::variant const &attrs) { return visit(GetOpTypeFunctor{}, attrs); } diff --git a/lib/op-attrs/include/op-attrs/get_output_shapes.h b/lib/op-attrs/include/op-attrs/get_output_shapes.h index 5f78ec2d3f..6fb93aac91 100644 --- a/lib/op-attrs/include/op-attrs/get_output_shapes.h +++ b/lib/op-attrs/include/op-attrs/get_output_shapes.h @@ -177,19 +177,19 @@ struct GetOutputShapesFunctor { template std::vector - get_output_shapes(variant const &t, + get_output_shapes(std::variant const &t, std::vector const &s) { return get_output_shape(GetOutputShapesFunctor{s}, t); } template -typename std::enable_if::value, optional>::type +typename std::enable_if::value, std::optional>::type get_num_outputs(T const &) { - return nullopt; + return std::nullopt; } template -typename std::enable_if::value, optional>::type +typename std::enable_if::value, std::optional>::type get_num_outputs(T const &) { return 1; } diff --git a/lib/op-attrs/include/op-attrs/operator_attrs.h b/lib/op-attrs/include/op-attrs/operator_attrs.h index a7ba84624c..9da787cbf8 100644 --- a/lib/op-attrs/include/op-attrs/operator_attrs.h +++ b/lib/op-attrs/include/op-attrs/operator_attrs.h @@ -35,31 +35,31 @@ namespace FlexFlow { -using SharedOperatorAttrs = variant; +using SharedOperatorAttrs = std::variant; static_assert(is_valid_opattr::value, ""); static_assert(is_valid_opattr::value, ""); @@ -85,11 +85,11 @@ static_assert(is_valid_opattr::value, ""); static_assert(is_valid_opattr::value, ""); static_assert(is_valid_opattr::value, ""); -using ParallelOperatorAttrs = +using ParallelOperatorAttrs = std:: variant; using ComputationGraphAttrs = - variant_join>; + variant_join>; using CompGraphOperatorAttrs = ComputationGraphAttrs; using PCGOperatorAttrs = diff --git a/lib/op-attrs/include/op-attrs/ops/conv_2d.h b/lib/op-attrs/include/op-attrs/ops/conv_2d.h index 3034dc8c62..79980d545d 100644 --- a/lib/op-attrs/include/op-attrs/ops/conv_2d.h +++ b/lib/op-attrs/include/op-attrs/ops/conv_2d.h @@ -10,9 +10,9 @@ namespace FlexFlow { struct Conv2DAttrs { - req out_channels, kernel_h, kernel_w, stride_h, stride_w, padding_h, + int out_channels, kernel_h, kernel_w, stride_h, stride_w, padding_h, padding_w, groups; - req> activation; + std::optional activation; req use_bias; }; diff --git a/lib/op-attrs/include/op-attrs/ops/linear.h b/lib/op-attrs/include/op-attrs/ops/linear.h index 3be8be2040..2c27b09f7c 100644 --- a/lib/op-attrs/include/op-attrs/ops/linear.h +++ b/lib/op-attrs/include/op-attrs/ops/linear.h @@ -21,14 +21,14 @@ struct L2RegularizerAttrs { FF_VISITABLE_STRUCT(L2RegularizerAttrs, lambda); CHECK_VALID_OP_ATTR(L2RegularizerAttrs); -using RegularizerAttrs = variant; +using RegularizerAttrs = std::variant; struct LinearAttrs { - req out_channels; - req use_bias; - req data_type; - req activation; - req> regularizer; + int out_channels; + bool use_bias; + DataType data_type; + Activation activation; + req> regularizer; }; FF_VISITABLE_STRUCT( LinearAttrs, out_channels, use_bias, data_type, activation, regularizer); diff --git a/lib/op-attrs/include/op-attrs/ops/loss_functions.h b/lib/op-attrs/include/op-attrs/ops/loss_functions.h index 7a3db05329..58d372d9e5 100644 --- a/lib/op-attrs/include/op-attrs/ops/loss_functions.h +++ b/lib/op-attrs/include/op-attrs/ops/loss_functions.h @@ -3,8 +3,8 @@ #include "core.h" #include "utils/exception.h" -#include "utils/variant.h" #include "utils/visitable.h" +#include namespace FlexFlow { @@ -31,7 +31,7 @@ FF_VISITABLE_STRUCT(OtherLossAttrs, loss_type); CHECK_VALID_OP_ATTR(OtherLossAttrs); using LossAttrs = - variant; + std::variant; LossFunction get_loss_function(OtherLossAttrs const &); LossFunction get_loss_function(SparseCategoricalCrossEntropyLossAttrs const &); diff --git a/lib/op-attrs/src/operator_attrs.cc b/lib/op-attrs/src/operator_attrs.cc index 16f410f870..a524ab3d14 100644 --- a/lib/op-attrs/src/operator_attrs.cc +++ b/lib/op-attrs/src/operator_attrs.cc @@ -166,8 +166,8 @@ struct AsDot { }; template -RecordFormatter as_dot(variant const &o) { - return mpark::visit(AsDot{}, o); +RecordFormatter as_dot(std::variant const &o) { + return std::visit(AsDot{}, o); } struct IsValidFunctor { diff --git a/lib/op-attrs/src/parallel_dim_mapping_record.cc b/lib/op-attrs/src/parallel_dim_mapping_record.cc index a5fa6823e9..5e734e88cd 100644 --- a/lib/op-attrs/src/parallel_dim_mapping_record.cc +++ b/lib/op-attrs/src/parallel_dim_mapping_record.cc @@ -13,7 +13,7 @@ ParallelDimMappingRecord ParallelDimMappingRecord::input_output_record( int input_dim, int output_idx, int output_dim, - tl::optional operation) { + std::optional operation) { ParallelDimMappingRecord r(MappingRecordType::INPUT_OUTPUT); r.operation = operation; @@ -36,7 +36,7 @@ ParallelDimMappingRecord ParallelDimMappingRecord::input_weight_record( int input_dim, int weight_idx, int weight_dim, - tl::optional operation) { + std::optional operation) { ParallelDimMappingRecord r(MappingRecordType::INPUT_WEIGHT); r.operation = operation; diff --git a/lib/op-attrs/src/parallel_dim_mapping_record.h b/lib/op-attrs/src/parallel_dim_mapping_record.h index c0f325ab7e..c37ac79b40 100644 --- a/lib/op-attrs/src/parallel_dim_mapping_record.h +++ b/lib/op-attrs/src/parallel_dim_mapping_record.h @@ -1,7 +1,6 @@ #ifndef _FLEXFLOW_OP_META_SRC_PARELLEL_DIM_MAPPING_RECORD_H #define _FLEXFLOW_OP_META_SRC_PARELLEL_DIM_MAPPING_RECORD_H -#include "tl/optional.hpp" #include "utils/visitable.h" #include @@ -23,18 +22,18 @@ class ParallelDimMappingRecord { int input_dim, int output_idx, int output_dim, - tl::optional operation = tl::nullopt); + std::optional operation = std::nullopt); static ParallelDimMappingRecord input_weight_record( int input_idx, int input_dim, int weight_idx, int weight_dim, - tl::optional operation = tl::nullopt); + std::optional operation = std::nullopt); MappingRecordType get_type() const; public: MappingRecordType type; - tl::optional operation; + std::optional operation; int output_dim, input_dim, weight_dim; int output_idx, input_idx, weight_idx; diff --git a/lib/op-attrs/src/parallel_dim_mapping_record_solver.cc b/lib/op-attrs/src/parallel_dim_mapping_record_solver.cc index 68686393f5..11cfbc125c 100644 --- a/lib/op-attrs/src/parallel_dim_mapping_record_solver.cc +++ b/lib/op-attrs/src/parallel_dim_mapping_record_solver.cc @@ -44,7 +44,7 @@ ParallelDimMappingRecord int output_dim, int input_idx, int output_idx, - tl::optional operation) { + std::optional operation) { NOT_IMPLEMENTED(); } @@ -53,7 +53,7 @@ ParallelDimMappingRecord int weight_dim, int input_idx, int weight_idx, - tl::optional operation) { + std::optional operation) { NOT_IMPLEMENTED(); } /* int get_output_to_input_dim_mapping(ParallelTensorShape const &output, */ @@ -209,7 +209,7 @@ void construct_weight_parallel_dims( int weight_dim, int input_idx, int weight_idx, - tl::optional operation) { + std::optional operation) { records.push_back(ParallelDimMappingRecord::input_weight_record( input_idx, input_dim, weight_idx, weight_dim, operation)); } @@ -274,7 +274,7 @@ void construct_output_parallel_dims( int output_dim, int input_idx, int output_idx, - tl::optional operation) { + std::optional operation) { records.push_back(ParallelDimMappingRecord::input_output_record( input_idx, input_dim, output_idx, output_dim, operation)); } diff --git a/lib/op-attrs/src/parallel_dim_mapping_record_solver.h b/lib/op-attrs/src/parallel_dim_mapping_record_solver.h index d28cd419ca..a46192edeb 100644 --- a/lib/op-attrs/src/parallel_dim_mapping_record_solver.h +++ b/lib/op-attrs/src/parallel_dim_mapping_record_solver.h @@ -29,7 +29,7 @@ ParallelDimMappingRecord construct_weight_parallel_dims( int weight_dim, int input_idx = 0, int weight_idx = 0, - tl::optional operation = tl::nullopt); + std::optional operation = std::nullopt); std::vector construct_output_parallel_dims(std::vector> mappings, @@ -44,7 +44,7 @@ ParallelDimMappingRecord construct_output_parallel_dims( int output_dim, int input_idx = 0, int output_idx = 0, - tl::optional operation = tl::nullopt); + std::optional operation = std::nullopt); struct ParallelDimMappingSolution { std::vector weight_shapes; @@ -85,7 +85,7 @@ ParallelDimMappingSolution solve_parallel_dim_mappings( /* int weight_dim, */ /* int input_idx = 0, */ /* int weight_idx = 0, */ -/* tl::optional operation = tl::nullopt); */ +/* std::optional operation = std::nullopt); */ /* void register_output_parallel_dims( */ /* std::vector> mappings, */ /* int input_idx = 0, */ @@ -95,7 +95,7 @@ ParallelDimMappingSolution solve_parallel_dim_mappings( /* int output_dim, */ /* int input_idx = 0, */ /* int output_idx = 0, */ -/* tl::optional operation = tl::nullopt); */ +/* std::optional operation = std::nullopt); */ /* private: */ /* std::vector *parallel_dims_mapping; */ diff --git a/lib/pcg/include/pcg/computation_graph_builder.h b/lib/pcg/include/pcg/computation_graph_builder.h index ae937c590d..035f0cad0b 100644 --- a/lib/pcg/include/pcg/computation_graph_builder.h +++ b/lib/pcg/include/pcg/computation_graph_builder.h @@ -12,101 +12,114 @@ struct ComputationGraphBuilder // C++ APIs for constructing models // Add an exp layer - Tensor exp(Tensor const &, optional const &name = nullopt); + Tensor exp(Tensor const &, + std::optional const &name = std::nullopt); // Add an add layer Tensor add(Tensor const &x, Tensor const &y, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add a subtract layer Tensor subtract(Tensor const &x, Tensor const &y, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add a multiply layer Tensor multiply(Tensor const &x, Tensor const &y, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add a divide layer Tensor divide(Tensor const &x, Tensor const &y, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add a max layer Tensor max(Tensor const &x, Tensor const &y, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add a min layer Tensor min(Tensor const &x, Tensor const &y, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add a rsqrt layer - Tensor rsqrt(Tensor const &x, optional const &name = nullopt); + Tensor rsqrt(Tensor const &x, + std::optional const &name = std::nullopt); // Add a pow layer Tensor pow(Tensor const &x, float exponent, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add a scalar multiply layer Tensor scalar_multiply(Tensor const &x, float scalar, - optional const &name = nullopt); + std::optional const &name = std::nullopt); Tensor scalar_add(Tensor const &x, float scalar, - optional const &name = nullopt); + std::optional const &name = std::nullopt); Tensor scalar_sub(Tensor const &lhs, float rhs, - optional const &name = nullopt); + std::optional const &name = std::nullopt); Tensor scalar_truediv(Tensor const &numerator, float denominator, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add a sin layer - Tensor sin(Tensor const &x, optional const &name = nullopt); + Tensor sin(Tensor const &x, + std::optional const &name = std::nullopt); // Add a cos layer - Tensor cos(Tensor const &x, optional const &name = nullopt); + Tensor cos(Tensor const &x, + std::optional const &name = std::nullopt); // Add an activation layer - Tensor relu(Tensor const &x, optional const &name = nullopt); - Tensor identity(Tensor const &x, optional const &name = nullopt); - Tensor gelu(Tensor const &x, optional const &name = nullopt); - Tensor sigmoid(Tensor const &x, optional const &name = nullopt); - Tensor tanh(Tensor const &x, optional const &name = nullopt); - Tensor elu(Tensor const &x, optional const &name = nullopt); + Tensor relu(Tensor const &x, + std::optional const &name = std::nullopt); + Tensor identity(Tensor const &x, + std::optional const &name = std::nullopt); + Tensor gelu(Tensor const &x, + std::optional const &name = std::nullopt); + Tensor sigmoid(Tensor const &x, + std::optional const &name = std::nullopt); + Tensor tanh(Tensor const &x, + std::optional const &name = std::nullopt); + Tensor elu(Tensor const &x, + std::optional const &name = std::nullopt); // Add a 2D convolutional layer - Tensor conv2d(Tensor const &input, - int outChannels, - int kernelH, - int kernelW, - int strideH, - int strideW, - int paddingH, - int paddingW, - optional const &activation = nullopt, - int groups = 1, - bool use_bias = true, - optional kernel_initializer = nullopt, - optional bias_initializer = nullopt, - optional kernel_regularizer = nullopt, - optional const &name = nullopt); + Tensor conv2d( + Tensor const &input, + int outChannels, + int kernelH, + int kernelW, + int strideH, + int strideW, + int paddingH, + int paddingW, + std::optional const &activation = std::nullopt, + int groups = 1, + bool use_bias = true, + std::optional const &kernel_initializer = std::nullopt, + std::optional const &bias_initializer = std::nullopt, + std::optional const &kernel_regularizer = std::nullopt, + std::optional const &name = std::nullopt); // Add a dropout layer Tensor dropout(Tensor const &input, float rate, unsigned long long seed = 0, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add an embedding layer - Tensor embedding(Tensor const &input, - int num_entries, - int outDim, - AggregateOp aggr, - DataType dtype = DataType::FLOAT, - optional kernel_initializer = nullopt, - optional const &name = nullopt); + Tensor embedding( + Tensor const &input, + int num_entries, + int outDim, + AggregateOp aggr, + DataType dtype = DataType::FLOAT, + std::optional const &kernel_initializer = std::nullopt, + std::optional const &name = std::nullopt); // Add a gather layer - std::vector gather(Tensor const &input, - Tensor const &index, - ff_dim_t dim, - optional const &name = nullopt); + std::vector + gather(Tensor const &input, + Tensor const &index, + ff_dim_t dim, + std::optional const &name = std::nullopt); // Add a cache layer Tensor cache(Tensor const &input, int num_batches, std::function score_f = {}, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add a 2D pooling layer Tensor pool2d(Tensor const &input, int kernelH, @@ -116,38 +129,39 @@ struct ComputationGraphBuilder int paddingH, int paddingW, PoolOp type = PoolOp::MAX, - optional const &activation = nullopt, - optional const &name = nullopt); + std::optional const &activation = std::nullopt, + std::optional const &name = std::nullopt); Tensor layer_norm(Tensor const &input, std::vector const &axes, bool elementwise_affine, float eps, - optional const &name = nullopt); + std::optional const &name = std::nullopt); Tensor batch_norm(Tensor const &input, bool relu = true, - optional const &name = nullopt); + std::optional const &name = std::nullopt); Tensor batch_matmul(Tensor const &A, Tensor const &B, int a_seq_length_dim = -1, int b_seq_length_dim = -1, - optional const &name = nullopt); - Tensor dense(Tensor const &input, - int outDim, - optional activation = nullopt, - bool use_bias = true, - DataType data_type = DataType::FLOAT, - optional kernel_initializer = nullopt, - optional bias_initializer = nullopt, - optional const &name = nullopt); + std::optional const &name = std::nullopt); + Tensor + dense(Tensor const &input, + int outDim, + std::optional activation = std::nullopt, + bool use_bias = true, + DataType data_type = DataType::FLOAT, + std::optional const &kernel_initializer = std::nullopt, + std::optional const &bias_initializer = std::nullopt, + std::optional const &name = std::nullopt); // Add a cast layer Tensor cast(Tensor const &input, DataType dtype, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add a concat layer Tensor concat(int n, std::vector const &tensors, int axis, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add a mean layer Tensor mean(Tensor const &input, std::vector const &dims, @@ -158,51 +172,53 @@ struct ComputationGraphBuilder Tensor *outputs, std::vector const &split, int axis, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Add a flat layer - Tensor flat(Tensor const &input, optional const &name = nullopt); + Tensor flat(Tensor const &input, + std::optional const &name = std::nullopt); // Add a softmax layer Tensor softmax(Tensor const &input, int dim = -1, - optional const &name = nullopt); + std::optional const &name = std::nullopt); // Create input tensors and constants Tensor transpose(Tensor const &input, std::vector const &perm, - optional const &name = nullopt); + std::optional const &name = std::nullopt); Tensor reduce_sum(Tensor const &input, std::vector const &axes, bool keepdims = false, - optional const &name = nullopt); + std::optional const &name = std::nullopt); Tensor reshape(Tensor const &input, std::vector const &shape, - optional const &name = nullopt); + std::optional const &name = std::nullopt); Tensor reverse(Tensor const &input, int axis, - optional const &name = nullopt); + std::optional const &name = std::nullopt); void top_k(Tensor const &input, Tensor *outputs, int k, bool sorted, - optional const &name = nullopt); - Tensor - multihead_attention(Tensor const &query, - Tensor const &key, - Tensor const &value, - int embed_dim, - int num_heads, - int kdim = 0, - int vdim = 0, - float dropout = 0.0f, - bool bias = true, - bool add_bias_kv = false, - bool add_zero_attn = false, - optional initializer = nullopt, - optional const &name = nullopt); + std::optional const &name = std::nullopt); + Tensor multihead_attention( + Tensor const &query, + Tensor const &key, + Tensor const &value, + int embed_dim, + int num_heads, + int kdim = 0, + int vdim = 0, + float dropout = 0.0f, + bool bias = true, + bool add_bias_kv = false, + bool add_zero_attn = false, + std::optional initializer = std::nullopt, + std::optional const &name = std::nullopt); Tensor create_tensor(TensorShape const &, bool create_grad = true); - Parameter create_weight(TensorShape const &, - bool create_grad = true, - optional initializer = nullopt, - optional sync_type = nullopt); + Parameter create_weight( + TensorShape const &, + bool create_grad = true, + std::optional const &initializer = std::nullopt, + std::optional sync_type = std::nullopt); std::vector get_outputs(Layer const &) const; Tensor get_output(Layer const &, int idx) const; @@ -217,18 +233,18 @@ struct ComputationGraphBuilder std::vector const &inputs, std::vector const &weights, std::vector const &outputs); - Tensor - add_layer(Layer const &layer, - std::vector const &inputs, - std::vector>> const - &weight_shapes, - TensorShape const &output_shape); - std::vector - add_layer(Layer const &layer, - std::vector const &inputs, - std::vector>> const - &weight_shapes, - std::vector const &output_shapes); + Tensor add_layer( + Layer const &layer, + std::vector const &inputs, + std::vector>> const + &weight_shapes, + TensorShape const &output_shape); + std::vector add_layer( + Layer const &layer, + std::vector const &inputs, + std::vector>> const + &weight_shapes, + std::vector const &output_shapes); Tensor as_type(Tensor const &, DataType, std::string const &); @@ -237,21 +253,22 @@ struct ComputationGraphBuilder Tensor element_binary(OperatorType, Tensor const &lhs, Tensor const &rhs, - optional const &name = nullopt); + std::optional const &name = std::nullopt); Tensor element_unary(OperatorType, Tensor const &input, - optional const &name = nullopt); - Tensor element_scalar_unary(OperatorType, - Tensor const &input, - float scalar, - optional const &name = nullopt); + std::optional const &name = std::nullopt); + Tensor element_scalar_unary( + OperatorType, + Tensor const &input, + float scalar, + std::optional const &name = std::nullopt); Tensor element_unary(ElementUnaryAttrs const &, Tensor const &input, - optional const &name = nullopt); + std::optional const &name = std::nullopt); Tensor element_scalar_unary(ElementScalarUnaryAttrs const &attrs, Tensor const &x, - optional const &maybe_name); + std::optional const &maybe_name); public: ComputationGraph computation_graph; diff --git a/lib/pcg/include/pcg/device_id.h b/lib/pcg/include/pcg/device_id.h index 3ace2fd84e..50c2558e39 100644 --- a/lib/pcg/include/pcg/device_id.h +++ b/lib/pcg/include/pcg/device_id.h @@ -3,7 +3,6 @@ #include "device_type.h" #include "utils/strong_typedef.h" -#include "utils/variant.h" namespace FlexFlow { @@ -15,7 +14,7 @@ struct cpu_id_t : strong_typedef { using strong_typedef::strong_typedef; }; -using device_id_t = variant; +using device_id_t = std::variant; device_id_t operator+(device_id_t, size_t); DeviceType get_device_type(device_id_t); diff --git a/lib/pcg/include/pcg/file_format/v1/data_type.h b/lib/pcg/include/pcg/file_format/v1/data_type.h index dad98e462d..eab188155f 100644 --- a/lib/pcg/include/pcg/file_format/v1/data_type.h +++ b/lib/pcg/include/pcg/file_format/v1/data_type.h @@ -3,11 +3,11 @@ #include "utils/fp16.h" #include "utils/json.h" -#include "utils/variant.h" namespace FlexFlow { -using V1DataTypeValue = variant; +using V1DataTypeValue = + std::variant; enum class V1DataType { BOOL, diff --git a/lib/pcg/include/pcg/file_format/v1/graphs.h b/lib/pcg/include/pcg/file_format/v1/graphs.h index 71a8adb344..6bc852b0f1 100644 --- a/lib/pcg/include/pcg/file_format/v1/graphs.h +++ b/lib/pcg/include/pcg/file_format/v1/graphs.h @@ -53,7 +53,7 @@ struct V1JsonableGraph { struct V1Layer { V1CompGraphOperatorAttrs attrs; - req> name; + req> name; }; FF_VISITABLE_STRUCT(V1Layer, attrs, name); V1Layer to_v1(Layer const &); diff --git a/lib/pcg/include/pcg/file_format/v1/initializer.h b/lib/pcg/include/pcg/file_format/v1/initializer.h index 24f0320bd9..21af7d55e0 100644 --- a/lib/pcg/include/pcg/file_format/v1/initializer.h +++ b/lib/pcg/include/pcg/file_format/v1/initializer.h @@ -19,15 +19,15 @@ struct V1ZeroInitializer {}; FF_VISITABLE_STRUCT(V1ZeroInitializer); struct V1UniformInitializer { - req seed; - req min_val; + int seed; + float min_val; req max_val; }; FF_VISITABLE_STRUCT(V1UniformInitializer, seed, min_val, max_val); struct V1NormInitializer { - req seed; - req mean; + int seed; + float mean; req stddev; }; FF_VISITABLE_STRUCT(V1NormInitializer, seed, mean, stddev); @@ -37,11 +37,11 @@ struct V1ConstantInitializer { }; FF_VISITABLE_STRUCT(V1ConstantInitializer, value); -using V1Initializer = variant; +using V1Initializer = std::variant; } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/file_format/v1/operator_attrs.h b/lib/pcg/include/pcg/file_format/v1/operator_attrs.h index 2ea87cbf56..2830fbd301 100644 --- a/lib/pcg/include/pcg/file_format/v1/operator_attrs.h +++ b/lib/pcg/include/pcg/file_format/v1/operator_attrs.h @@ -2,7 +2,7 @@ #define _FLEXFLOW_PCG_INCLUDE_PCG_FILE_FORMAT_V1_OPERATOR_ATTRS_H #include "utils/json.h" -#include "utils/variant.h" +#include namespace FlexFlow { @@ -12,8 +12,8 @@ FF_VISITABLE_STRUCT(V1Conv2DAttrs); static_assert( std::is_same, std::tuple<>>::value, ""); -using V1CompGraphOperatorAttrs = variant; -using V1PCGOperatorAttrs = variant; +using V1CompGraphOperatorAttrs = std::variant; +using V1PCGOperatorAttrs = std::variant; } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/file_format/v1/parallel_tensor.h b/lib/pcg/include/pcg/file_format/v1/parallel_tensor.h index 1ea4cd04de..c215569b21 100644 --- a/lib/pcg/include/pcg/file_format/v1/parallel_tensor.h +++ b/lib/pcg/include/pcg/file_format/v1/parallel_tensor.h @@ -11,22 +11,22 @@ namespace FlexFlow { struct V1ParallelDim { - req size; - req degree; + size_t size; + int degree; req is_replica_dim; }; FF_VISITABLE_STRUCT(V1ParallelDim, size, degree, is_replica_dim); struct V1ParallelTensorShape { - req> dims; + std::vector dims; req data_type; }; FF_VISITABLE_STRUCT(V1ParallelTensorShape, dims, data_type); struct V1ParallelTensor { V1ParallelTensorShape shape; - req> sync_type; - req> initializer; + std::optional sync_type; + std::optional initializer; req create_grad; }; FF_VISITABLE_STRUCT( diff --git a/lib/pcg/include/pcg/file_format/v1/tensor.h b/lib/pcg/include/pcg/file_format/v1/tensor.h index e1f6828186..c304a41401 100644 --- a/lib/pcg/include/pcg/file_format/v1/tensor.h +++ b/lib/pcg/include/pcg/file_format/v1/tensor.h @@ -12,7 +12,7 @@ namespace FlexFlow { struct V1TensorShape { - req> dims; + std::vector dims; req data_type; }; FF_VISITABLE_STRUCT(V1TensorShape, dims, data_type); @@ -21,10 +21,10 @@ V1TensorShape to_v1(TensorShape const &); struct V1Tensor { V1TensorShape shape; - req> initializer; - req create_gradients; - req> sync_type; - req> name; + std::optional initializer; + bool create_gradients; + std::optional sync_type; + req> name; }; FF_VISITABLE_STRUCT( V1Tensor, shape, initializer, create_gradients, sync_type, name); diff --git a/lib/pcg/include/pcg/initializer.h b/lib/pcg/include/pcg/initializer.h index 58e4fcc242..6913289653 100644 --- a/lib/pcg/include/pcg/initializer.h +++ b/lib/pcg/include/pcg/initializer.h @@ -20,15 +20,15 @@ struct ZeroInitializer { FF_VISITABLE_STRUCT(ZeroInitializer); struct UniformInitializer { - req seed; - req min_val; + int seed; + float min_val; req max_val; }; FF_VISITABLE_STRUCT(UniformInitializer, seed, min_val, max_val); struct NormInitializer { - req seed; - req mean; + int seed; + float mean; req stddev; }; FF_VISITABLE_STRUCT(NormInitializer, seed, mean, stddev); @@ -38,11 +38,11 @@ struct ConstantInitializer { }; FF_VISITABLE_STRUCT(ConstantInitializer, value); -using Initializer = variant; +using Initializer = std::variant; CHECK_WELL_BEHAVED_VALUE_TYPE(Initializer); } // namespace FlexFlow diff --git a/lib/pcg/include/pcg/layer.h b/lib/pcg/include/pcg/layer.h index abf1fe6a91..9749cb9d06 100644 --- a/lib/pcg/include/pcg/layer.h +++ b/lib/pcg/include/pcg/layer.h @@ -7,13 +7,14 @@ namespace FlexFlow { -struct Layer : public use_visitable_cmp { +struct Layer { public: Layer() = delete; - Layer(CompGraphOperatorAttrs const &attrs, optional const &name); + Layer(CompGraphOperatorAttrs const &attrs, + std::optional const &name); public: - optional> name; + std::optional> name; CompGraphOperatorAttrs attrs; }; diff --git a/lib/pcg/include/pcg/operator.h b/lib/pcg/include/pcg/operator.h index c7a49bb57e..5804e38f95 100644 --- a/lib/pcg/include/pcg/operator.h +++ b/lib/pcg/include/pcg/operator.h @@ -11,7 +11,8 @@ namespace FlexFlow { struct Operator : public use_visitable_cmp { public: Operator() = delete; - Operator(PCGOperatorAttrs const &attrs, optional const &name); + Operator(PCGOperatorAttrs const &attrs, + std::optional const &name); operator PCGOperatorAttrs() const; diff --git a/lib/pcg/include/pcg/parallel_tensor.h b/lib/pcg/include/pcg/parallel_tensor.h index eadc83d9fd..c3f7ebdfed 100644 --- a/lib/pcg/include/pcg/parallel_tensor.h +++ b/lib/pcg/include/pcg/parallel_tensor.h @@ -39,19 +39,19 @@ struct ParallelTensor : public use_visitable_cmp { ParallelTensor(ParallelTensorShape const &, CreateGrad create_gradients, - optional sync_type = nullopt, - optional initializer = nullopt); + std::optional sync_type = std::nullopt, + std::optional initializer = std::nullopt); ParallelTensor(ParallelTensorDims const &, DataType, CreateGrad create_gradients, - optional sync_type = nullopt, - optional initializer = nullopt); + std::optional sync_type = std::nullopt, + std::optional initializer = std::nullopt); public: ParallelTensorDims dims; DataType data_type; - optional sync_type = nullopt; - optional initializer = nullopt; + std::optional sync_type = std::nullopt; + std::optional initializer = std::nullopt; CreateGrad create_gradients; }; diff --git a/lib/pcg/include/pcg/tensor.h b/lib/pcg/include/pcg/tensor.h index cb79be245a..975a69809d 100644 --- a/lib/pcg/include/pcg/tensor.h +++ b/lib/pcg/include/pcg/tensor.h @@ -24,9 +24,9 @@ struct Tensor { public: TensorDims dims; DataType data_type; - req> initializer; - req create_gradients; - req> sync_type; + std::optional initializer; + bool create_gradients; + req> sync_type; }; FF_VISITABLE_STRUCT( Tensor, dims, data_type, initializer, create_gradients, sync_type); diff --git a/lib/pcg/src/computation_graph_builder.cc b/lib/pcg/src/computation_graph_builder.cc index 9f8e930919..c2e008231e 100644 --- a/lib/pcg/src/computation_graph_builder.cc +++ b/lib/pcg/src/computation_graph_builder.cc @@ -15,7 +15,7 @@ void ComputationGraphBuilder::add_layer(Layer const &layer, Tensor ComputationGraphBuilder::add_layer( Layer const &layer, std::vector const &inputs, - std::vector>> const + std::vector>> const &weight_shapes, TensorShape const &output_shape) { NOT_IMPLEMENTED(); @@ -23,7 +23,7 @@ Tensor ComputationGraphBuilder::add_layer( std::vector ComputationGraphBuilder::add_layer( Layer const &layer, std::vector const &inputs, - std::vector>> const + std::vector>> const &weight_shapes, std::vector const &output_shapes) { NOT_IMPLEMENTED(); @@ -34,7 +34,7 @@ Tensor ComputationGraphBuilder::broadcast(Tensor const &, TensorShape const &) { } Tensor ComputationGraphBuilder::cast(Tensor const &input, DataType dtype, - optional const &name){ + std::optional const &name){ NOT_IMPLEMENTED()} Tensor ComputationGraphBuilder::as_type(Tensor const &x, @@ -60,14 +60,14 @@ static std::string get_default_name(ComputationGraphAttrs const &attrs) { } template -static std::string get_default_name(variant const &attrs) { +static std::string get_default_name(std::variant const &attrs) { return get_default_name(widen(attrs)); } Tensor ComputationGraphBuilder::element_unary( ElementUnaryAttrs const &attrs, Tensor const &x, - optional const &maybe_name) { + std::optional const &maybe_name) { std::string name = maybe_name.value_or(get_default_name(attrs)); Tensor input = this->as_type(x, DataType::FLOAT, name + "input_pre_cast"); @@ -81,7 +81,7 @@ Tensor ComputationGraphBuilder::element_unary( Tensor ComputationGraphBuilder::element_scalar_unary( ElementScalarUnaryAttrs const &attrs, Tensor const &x, - optional const &maybe_name) { + std::optional const &maybe_name) { std::string name = maybe_name.value_or(get_default_name(attrs)); Tensor input = this->as_type(x, DataType::FLOAT, name + "input_pre_cast"); @@ -92,10 +92,10 @@ Tensor ComputationGraphBuilder::element_scalar_unary( return this->add_layer(layer, {input}, {}, output_shape); } -Tensor - ComputationGraphBuilder::element_unary(OperatorType op_type, - Tensor const &input, - optional const &name) { +Tensor ComputationGraphBuilder::element_unary( + OperatorType op_type, + Tensor const &input, + std::optional const &name) { ElementUnaryAttrs attrs = {op_type}; return this->element_unary(attrs, input, name); } @@ -104,7 +104,7 @@ Tensor ComputationGraphBuilder::element_scalar_unary( OperatorType op_type, Tensor const &input, float scalar, - optional const &name) { + std::optional const &name) { ElementScalarUnaryAttrs attrs = {op_type, scalar}; return this->element_scalar_unary(attrs, input, name); } @@ -113,7 +113,7 @@ Tensor ComputationGraphBuilder::element_binary( OperatorType op_type, Tensor const &lhs, Tensor const &rhs, - optional const &maybe_name) { + std::optional const &maybe_name) { std::string name = maybe_name.value_or(get_default_name(op_type)); TensorShape compute_shape = this->get_broadcast_target_shape({lhs, rhs}); @@ -135,119 +135,121 @@ Tensor ComputationGraphBuilder::element_binary( } Tensor ComputationGraphBuilder::exp(Tensor const &input, - optional const &name) { + std::optional const &name) { return this->element_unary(Op::EXP, input, name); } Tensor ComputationGraphBuilder::add(Tensor const &lhs, Tensor const &rhs, - optional const &name) { + std::optional const &name) { return this->element_binary(Op::EW_ADD, lhs, rhs, name); } -Tensor ComputationGraphBuilder::subtract(Tensor const &lhs, - Tensor const &rhs, - optional const &name) { +Tensor + ComputationGraphBuilder::subtract(Tensor const &lhs, + Tensor const &rhs, + std::optional const &name) { return this->element_binary(Op::EW_SUB, lhs, rhs, name); } -Tensor ComputationGraphBuilder::multiply(Tensor const &lhs, - Tensor const &rhs, - optional const &name) { +Tensor + ComputationGraphBuilder::multiply(Tensor const &lhs, + Tensor const &rhs, + std::optional const &name) { return this->element_binary(Op::EW_MUL, lhs, rhs, name); } Tensor ComputationGraphBuilder::divide(Tensor const &lhs, Tensor const &rhs, - optional const &name) { + std::optional const &name) { return this->element_binary(Op::EW_DIV, lhs, rhs, name); } Tensor ComputationGraphBuilder::max(Tensor const &lhs, Tensor const &rhs, - optional const &name) { + std::optional const &name) { return this->element_binary(Op::EW_MAX, lhs, rhs, name); } Tensor ComputationGraphBuilder::min(Tensor const &lhs, Tensor const &rhs, - optional const &name) { + std::optional const &name) { return this->element_binary(Op::EW_MIN, lhs, rhs, name); } Tensor ComputationGraphBuilder::rsqrt(Tensor const &input, - optional const &name) { + std::optional const &name) { return this->element_unary(Op::RSQRT, input, name); } Tensor ComputationGraphBuilder::pow(Tensor const &input, float exponent, - optional const &name) { + std::optional const &name) { return this->element_scalar_unary(Op::POW, input, exponent, name); } Tensor ComputationGraphBuilder::scalar_multiply( - Tensor const &input, float scalar, optional const &name) { + Tensor const &input, float scalar, std::optional const &name) { return this->element_scalar_unary(Op::SCALAR_MULTIPLY, input, scalar, name); } -Tensor ComputationGraphBuilder::scalar_add(Tensor const &input, - float scalar, - optional const &name) { +Tensor ComputationGraphBuilder::scalar_add( + Tensor const &input, float scalar, std::optional const &name) { return this->element_scalar_unary(Op::SCALAR_ADD, input, scalar, name); } -Tensor ComputationGraphBuilder::scalar_sub(Tensor const &lhs, - float rhs, - optional const &name) { +Tensor ComputationGraphBuilder::scalar_sub( + Tensor const &lhs, float rhs, std::optional const &name) { return this->element_scalar_unary(Op::SCALAR_SUB, lhs, rhs, name); } -Tensor - ComputationGraphBuilder::scalar_truediv(Tensor const &numerator, - float denominator, - optional const &name) { +Tensor ComputationGraphBuilder::scalar_truediv( + Tensor const &numerator, + float denominator, + std::optional const &name) { return this->element_scalar_unary( Op::SCALAR_TRUE_DIV, numerator, denominator, name); } Tensor ComputationGraphBuilder::sin(Tensor const &input, - optional const &name) { + std::optional const &name) { return this->element_unary(Op::SIN, input, name); } Tensor ComputationGraphBuilder::cos(Tensor const &input, - optional const &name) { + std::optional const &name) { return this->element_unary(Op::COS, input, name); } Tensor ComputationGraphBuilder::relu(Tensor const &input, - optional const &name) { + std::optional const &name) { return this->element_unary(Op::RELU, input, name); } -Tensor ComputationGraphBuilder::identity(Tensor const &input, - optional const &name) { +Tensor + ComputationGraphBuilder::identity(Tensor const &input, + std::optional const &name) { return this->element_unary(Op::IDENTITY, input, name); } Tensor ComputationGraphBuilder::gelu(Tensor const &input, - optional const &name) { + std::optional const &name) { return this->element_unary(Op::GELU, input, name); } -Tensor ComputationGraphBuilder::sigmoid(Tensor const &input, - optional const &name) { +Tensor + ComputationGraphBuilder::sigmoid(Tensor const &input, + std::optional const &name) { return this->element_unary(Op::SIGMOID, input, name); } Tensor ComputationGraphBuilder::tanh(Tensor const &input, - optional const &name) { + std::optional const &name) { return this->element_unary(Op::TANH, input, name); } Tensor ComputationGraphBuilder::elu(Tensor const &input, - optional const &name) { + std::optional const &name) { return this->element_unary(Op::ELU, input, name); } @@ -260,13 +262,13 @@ Tensor ComputationGraphBuilder::conv2d( int strideW, int paddingH, int paddingW, - optional const &activation, + std::optional const &activation, int groups, bool use_bias, - optional kernel_initializer, - optional bias_initializer, - optional kernel_regularizer, - optional const &maybe_name) { + std::optional const &kernel_initializer, + std::optional const &bias_initializer, + std::optional const &kernel_regularizer, + std::optional const &maybe_name) { Conv2DAttrs attrs = {outChannels, kernelH, kernelW, @@ -284,7 +286,7 @@ Tensor ComputationGraphBuilder::conv2d( Layer layer = {attrs, name}; TensorShape output_shape = get_output_shape(attrs, input); - std::vector>> weights; + std::vector>> weights; weights.push_back({get_kernel_shape(attrs, input), kernel_initializer}); @@ -295,11 +297,11 @@ Tensor ComputationGraphBuilder::conv2d( return this->add_layer(layer, {input}, weights, output_shape); } -Tensor - ComputationGraphBuilder::dropout(Tensor const &x, - float rate, - unsigned long long seed, - optional const &maybe_name) { +Tensor ComputationGraphBuilder::dropout( + Tensor const &x, + float rate, + unsigned long long seed, + std::optional const &maybe_name) { DropoutAttrs attrs = {rate, seed}; std::string name = maybe_name.value_or(get_default_name(attrs)); @@ -317,8 +319,8 @@ Tensor ComputationGraphBuilder::embedding( int outDim, AggregateOp aggr, DataType dtype, - optional kernel_initializer, - optional const &maybe_name) { + std::optional const &kernel_initializer, + std::optional const &maybe_name) { EmbeddingAttrs attrs = {num_entries, outDim, aggr, dtype}; std::string name = maybe_name.value_or(get_default_name(attrs)); @@ -332,11 +334,11 @@ Tensor ComputationGraphBuilder::embedding( layer, {input}, {{weights_shape, kernel_initializer}}, output_shape); } -std::vector - ComputationGraphBuilder::gather(Tensor const &input, - Tensor const &index, - ff_dim_t dim, - optional const &maybe_name) { +std::vector ComputationGraphBuilder::gather( + Tensor const &input, + Tensor const &index, + ff_dim_t dim, + std::optional const &maybe_name) { GatherAttrs attrs = {dim}; std::string name = maybe_name.value_or(get_default_name(attrs)); @@ -370,7 +372,7 @@ std::vector get_shape(std::vector const &) { // std::vector const &exp_preds, // int n, // float lambda_bal, -// optional const &maybe_name) { +// std::optional const &maybe_name) { // AggregateAttrs attrs = {n, lambda_bal}; // std::string name = maybe_name.value_or(get_default_name(attrs)); @@ -389,7 +391,9 @@ std::vector get_shape(std::vector const &) { // } Tensor ComputationGraphBuilder::batch_norm( - Tensor const &input, bool relu, optional const &maybe_name) { + Tensor const &input, + bool relu, + std::optional const &maybe_name) { BatchNormAttrs attrs = BatchNormAttrs{relu}; std::string name = maybe_name.value_or(get_default_name(attrs)); diff --git a/lib/pcg/src/device_id.cc b/lib/pcg/src/device_id.cc index 64be75667a..2849df7c3c 100644 --- a/lib/pcg/src/device_id.cc +++ b/lib/pcg/src/device_id.cc @@ -1,13 +1,14 @@ #include "pcg/device_id.h" +#include "utils/exception.h" #include namespace FlexFlow { DeviceType get_device_type(device_id_t const &id) { - if (holds_alternative(id)) { + if (std::holds_alternative(id)) { return DeviceType::GPU; } else { - assert(holds_alternative(id)); + assert(std::holds_alternative(id)); return DeviceType::CPU; } } diff --git a/lib/pcg/src/layer.cc b/lib/pcg/src/layer.cc index 27d5b31003..00fb07a8c5 100644 --- a/lib/pcg/src/layer.cc +++ b/lib/pcg/src/layer.cc @@ -3,7 +3,7 @@ namespace FlexFlow { Layer::Layer(CompGraphOperatorAttrs const &_attrs, - optional const &_name) + std::optional const &_name) : attrs(_attrs), name(_name) {} } // namespace FlexFlow diff --git a/lib/pcg/src/operator.cc b/lib/pcg/src/operator.cc index 8c79c67464..92ece9a2bf 100644 --- a/lib/pcg/src/operator.cc +++ b/lib/pcg/src/operator.cc @@ -3,7 +3,7 @@ namespace FlexFlow { Operator::Operator(PCGOperatorAttrs const &attrs, - optional const &name) + std::optional const &name) : attrs(attrs) {} Operator::operator PCGOperatorAttrs() const { diff --git a/lib/pcg/src/parallel_tensor.cc b/lib/pcg/src/parallel_tensor.cc index a8d7b15ea9..19dc1e96d3 100644 --- a/lib/pcg/src/parallel_tensor.cc +++ b/lib/pcg/src/parallel_tensor.cc @@ -5,8 +5,8 @@ namespace FlexFlow { ParallelTensor::ParallelTensor(ParallelTensorDims const &dims, DataType data_type, CreateGrad create_gradients, - optional sync_type, - optional initializer) + std::optional sync_type, + std::optional initializer) : dims(dims), data_type(data_type), sync_type(sync_type), initializer(initializer), create_gradients(create_gradients) {} diff --git a/lib/runtime/src/serialization.h b/lib/runtime/src/serialization.h index 53edb09075..65601990b0 100644 --- a/lib/runtime/src/serialization.h +++ b/lib/runtime/src/serialization.h @@ -106,11 +106,12 @@ struct is_trivially_serializable> : is_trivially_serializable {}; template -struct is_trivially_serializable> - : elements_satisfy> {}; +struct is_trivially_serializable> + : elements_satisfy> {}; template -struct is_trivially_serializable> : is_trivially_serializable {}; +struct is_trivially_serializable> + : is_trivially_serializable {}; template struct std_array_size_helper; @@ -146,7 +147,8 @@ static_assert(is_trivially_serializable::value, ""); static_assert(is_trivially_serializable::value, ""); static_assert(is_trivially_serializable::value, ""); static_assert(is_trivially_serializable::value, ""); -static_assert(is_trivially_serializable>::value, ""); +static_assert(is_trivially_serializable>::value, + ""); static_assert(std::is_same, std::tuple>::value, ""); diff --git a/lib/substitutions/src/substitution.cc b/lib/substitutions/src/substitution.cc index 72c9248e6c..dd28a9aa5d 100644 --- a/lib/substitutions/src/substitution.cc +++ b/lib/substitutions/src/substitution.cc @@ -226,7 +226,7 @@ Operator get_operator_attrs(SubParallelComputationGraph const &graph, get(assignments.at(OperatorAttributeKey::USE_BIAS)), get(assignments.at(OperatorAttributeKey::DATA_TYPE)), get(assignments.at(OperatorAttributeKey::ACTIVATION)), - get>( + get>( assignments.at(OperatorAttributeKey::REGULARIZER))}, nullopt); case Op::MULTIHEAD_ATTENTION: diff --git a/lib/utils/CMakeLists.txt b/lib/utils/CMakeLists.txt index ac23248db6..a0d77b9f76 100644 --- a/lib/utils/CMakeLists.txt +++ b/lib/utils/CMakeLists.txt @@ -8,14 +8,10 @@ ff_add_library( PRIVATE_INCLUDE src/ DEPS - optional expected - variant visit_struct fmt - invoke json - any cuda ) diff --git a/lib/utils/include/utils/containers.decl.h b/lib/utils/include/utils/containers.decl.h index 8ad65a4488..84fd4a5acc 100644 --- a/lib/utils/include/utils/containers.decl.h +++ b/lib/utils/include/utils/containers.decl.h @@ -2,7 +2,6 @@ #define _FLEXFLOW_UTILS_INCLUDE_UTILS_CONTAINERS_DECL_H #include "utils/bidict.h" -#include "utils/invoke.h" #include "utils/optional.decl" #include "utils/required_core.h" #include "utils/type_traits_core.h" @@ -119,14 +118,14 @@ template std::unordered_set without_order(C const &c); template -optional index_of(Container const &c, Element const &e); +std::optional index_of(Container const &c, Element const &e); template std::unordered_set intersection(std::unordered_set const &l, std::unordered_set const &r); template -optional intersection(C const &c); +std::optional intersection(C const &c); template bool are_disjoint(std::unordered_set const &l, @@ -146,13 +145,13 @@ bidict merge_maps(bidict const &lhs, bidict const &rhs); template , - typename V = invoke_result_t> + typename V = std::invoke_result_t> std::unordered_map generate_map(C const &c, F const &f); template , - typename V = invoke_result_t> + typename V = std::invoke_result_t> bidict generate_bidict(C const &c, F const &f); template @@ -189,7 +188,7 @@ std::unordered_set std::unordered_set const &input); template -optional maybe_get_only(C const &c); +std::optional maybe_get_only(C const &c); template typename C::value_type get_only(C const &c); @@ -204,7 +203,7 @@ template void extend(std::unordered_set &lhs, C const &rhs); template -void extend(C &lhs, optional const &e); +void extend(C &lhs, std::optional const &e); template bool all_of(C const &c, F const &f); @@ -240,7 +239,7 @@ std::unordered_set transform(std::unordered_set const &v, F const &f); template std::string transform(std::string const &s, F const &f); -template > +template > std::vector repeat(int n, F const &f); template @@ -256,7 +255,7 @@ std::vector flatmap(std::vector const &v, F const &f); template >> + typename Out = get_element_type_t>> std::unordered_set flatmap(std::unordered_set const &v, F const &f); template @@ -292,12 +291,12 @@ template T reversed(T const &t); template -std::vector value_all(std::vector> const &v); +std::vector value_all(std::vector> const &v); template std::vector subvec(std::vector const &v, - optional const &maybe_start, - optional const &maybe_end); + std::optional const &maybe_start, + std::optional const &maybe_end); template struct reversed_container_t; diff --git a/lib/utils/include/utils/containers.h b/lib/utils/include/utils/containers.h index 679586ba69..cdf4591cdb 100644 --- a/lib/utils/include/utils/containers.h +++ b/lib/utils/include/utils/containers.h @@ -3,11 +3,9 @@ #include "bidict.h" #include "containers.decl.h" -#include "invoke.h" #include "required_core.h" #include "type_traits_core.h" #include "utils/exception.h" -#include "utils/optional.h" #include "utils/type_traits.h" #include #include @@ -244,10 +242,10 @@ std::unordered_set without_order(C const &c) { } template -tl::optional index_of(Container const &c, Element const &e) { +std::optional index_of(Container const &c, Element const &e) { auto it = std::find(c.cbegin(), c.cend(), e); if (it == c.cend()) { - return tl::nullopt; + return std::nullopt; } else { return std::distance(c.cbegin(), it); } @@ -266,8 +264,8 @@ std::unordered_set intersection(std::unordered_set const &l, } template -optional intersection(C const &c) { - optional result; +std::optional intersection(C const &c) { + std::optional result; for (T const &t : c) { result = intersection(result.value_or(t), t); } @@ -420,11 +418,11 @@ std::unordered_set } template -optional maybe_get_only(C const &c) { +std::optional maybe_get_only(C const &c) { if (c.size() == 1) { return *c.cbegin(); } else { - return nullopt; + return std::nullopt; } } @@ -454,7 +452,7 @@ void extend(std::unordered_set &lhs, C const &rhs) { } template -void extend(C &lhs, optional const &e) { +void extend(C &lhs, std::optional const &e) { if (e.has_value()) { return extend(lhs, e.value()); } @@ -570,7 +568,7 @@ struct get_element_type { }; template -struct get_element_type> { +struct get_element_type> { using type = T; }; @@ -666,8 +664,8 @@ T reversed(T const &t) { } template -std::vector value_all(std::vector> const &v) { - return transform(v, [](optional const &element) { +std::vector value_all(std::vector> const &v) { + return transform(v, [](std::optional const &element) { return unwrap(element, [] { throw mk_runtime_error( "Encountered element without value in call to value_all"); @@ -677,8 +675,8 @@ std::vector value_all(std::vector> const &v) { template std::vector subvec(std::vector const &v, - optional const &maybe_start, - optional const &maybe_end) { + std::optional const &maybe_start, + std::optional const &maybe_end) { auto begin_iter = v.cbegin(); auto end_iter = v.cend(); diff --git a/lib/utils/include/utils/disjoint_set.h b/lib/utils/include/utils/disjoint_set.h index e0a3aaa5ee..4810e5b29e 100644 --- a/lib/utils/include/utils/disjoint_set.h +++ b/lib/utils/include/utils/disjoint_set.h @@ -12,19 +12,19 @@ namespace FlexFlow { template class m_disjoint_set { public: - void m_union(optional const &l, optional const &r) { + void m_union(std::optional const &l, std::optional const &r) { this->add_node_if_missing(l); this->add_node_if_missing(r); - optional const ll = this->find(l); - optional const rr = this->find(r); + std::optional const ll = this->find(l); + std::optional const rr = this->find(r); if (ll != rr) { this->mapping[ll] = rr; } } - optional const find(optional const &t) const { + std::optional const find(std::optional const &t) const { this->add_node_if_missing(t); - optional const parent = this->mapping.at(t); + std::optional const parent = this->mapping.at(t); if (!parent.has_value()) { return t; } else { @@ -33,18 +33,19 @@ class m_disjoint_set { } private: - void add_node_if_missing(optional const &t) const { + void add_node_if_missing(std::optional const &t) const { if (mapping.find(t) == mapping.end()) { - mapping[t] = nullopt; + mapping[t] = std::nullopt; } } - mutable std::unordered_map, optional> mapping; + mutable std::unordered_map, std::optional> mapping; }; // Custom comparator for optional template struct OptionalComparator { - bool operator()(optional const &lhs, optional const &rhs) const { + bool operator()(std::optional const &lhs, + std::optional const &rhs) const { if (!lhs.has_value() || !rhs.has_value()) { return false; } @@ -55,34 +56,34 @@ struct OptionalComparator { template > class disjoint_set { public: - void m_union(optional const &l, optional const &r) const { + void m_union(std::optional const &l, std::optional const &r) const { this->nodes.insert(l); this->nodes.insert(r); this->ds.m_union(this->get_node(l), this->get_node(r)); } - optional const find(optional const &t) const { + std::optional const find(std::optional const &t) const { this->nodes.insert(t); // Make sure the node is in the set return this->ds.find(this->get_node(t)); } - std::map, optional, Compare> get_mapping() const { - std::map, optional, Compare> mapping; - for (optional const &t : this->nodes) { + std::map, std::optional, Compare> get_mapping() const { + std::map, std::optional, Compare> mapping; + for (std::optional const &t : this->nodes) { mapping[t] = this->ds.find(t); } return mapping; } private: - optional const get_node(optional const &t) const { + std::optional const get_node(std::optional const &t) const { auto it = this->nodes.find(t); assert(it != this->nodes.end()); return *it; } mutable m_disjoint_set ds; - mutable std::set, Compare> + mutable std::set, Compare> nodes; // Note(lambda): make mutable to allow using ds->find() to be const // because while the result is invariant to path compression, etc. }; diff --git a/lib/utils/include/utils/dot_file.h b/lib/utils/include/utils/dot_file.h index 9529c659e2..6cdc78f6d4 100644 --- a/lib/utils/include/utils/dot_file.h +++ b/lib/utils/include/utils/dot_file.h @@ -2,7 +2,6 @@ #define _DOT_FILE_H #include "record_formatter.h" -#include "tl/optional.hpp" #include #include #include @@ -20,9 +19,9 @@ class DotFile { std::map node_ids; std::unordered_map> subgraphs; std::unordered_map> subgraph_children; - std::unordered_map> subgraph_parents; - tl::optional owned_fstream = tl::nullopt; - tl::optional out = tl::nullopt; + std::unordered_map> subgraph_parents; + std::optional owned_fstream = std::nullopt; + std::ostream *out = nullptr; std::string get_node_name(size_t node_id) const { std::ostringstream s; s << "node" << node_id; @@ -52,7 +51,7 @@ class DotFile { DotFile(std::string const &filename) : owned_fstream(filename) { this->start_output(); } - DotFile(std::ostream &s) : node_id(0), out(s) { + DotFile(std::ostream &s) : node_id(0), out(&s) { this->start_output(); } @@ -113,7 +112,7 @@ class DotFile { this->get_ostream().flush(); } - size_t add_subgraph(tl::optional parent_id = tl::nullopt) { + size_t add_subgraph(std::optional parent_id = std::nullopt) { size_t subgraph = this->subgraph_id; subgraph_id++; this->subgraph_children[subgraph]; @@ -134,7 +133,7 @@ class DotFile { throw std::runtime_error(oss.str()); } this->subgraphs[subgraph].insert(this->node_ids.at(node)); - tl::optional parent = this->subgraph_parents.at(subgraph); + std::optional parent = this->subgraph_parents.at(subgraph); if (parent.has_value()) { this->add_node_to_subgraph(node, parent.value()); } diff --git a/lib/utils/include/utils/fmt.h b/lib/utils/include/utils/fmt.h index ddf5b00355..58982d6f36 100644 --- a/lib/utils/include/utils/fmt.h +++ b/lib/utils/include/utils/fmt.h @@ -40,15 +40,15 @@ operator<<(std::ostream &s, T const &t) { #__VA_ARGS__ " must be fmtable"); // This will not -template -typename std::enable_if::value, - std::ostream &>::type - operator<<(std::ostream &s, T const &t) { - // CHECK_FMTABLE(T); - - std::string result = fmt::to_string(t); - return s << result; -} +/* template */ +/* typename std::enable_if::value, */ +/* std::ostream &>::type */ +/* operator<<(std::ostream &s, T const &t) { */ +/* // CHECK_FMTABLE(T); */ + +/* std::string result = fmt::to_string(t); */ +/* return s << result; */ +/* } */ // template // typename std::enable_if::value, std::ostream &>::type diff --git a/lib/utils/include/utils/graph/algorithms.h b/lib/utils/include/utils/graph/algorithms.h index 4b08fd5e4a..bb70a9093c 100644 --- a/lib/utils/include/utils/graph/algorithms.h +++ b/lib/utils/include/utils/graph/algorithms.h @@ -174,12 +174,12 @@ struct GetDstNodeFunctor { }; template -Node get_src_node(variant const &t) { +Node get_src_node(std::variant const &t) { return visit(GetSrcNodeFunctor{}, t); } template -Node get_dst_node(variant const &t) { +Node get_dst_node(std::variant const &t) { return visit(GetDstNodeFunctor{}, t); } @@ -203,12 +203,12 @@ struct GetDstIdxFunctor { }; template -NodePort get_src_idx(variant const &t) { +NodePort get_src_idx(std::variant const &t) { return visit(GetSrcIdxFunctor{}, t); } template -NodePort get_dst_idx(variant const &t) { +NodePort get_dst_idx(std::variant const &t) { return visit(GetDstIdxFunctor{}, t); } @@ -229,8 +229,8 @@ std::unordered_set get_open_sources(OpenMultiDiGraphView const &g); std::unordered_set get_open_sinks(OpenMultiDiGraphView const &g); bool is_acyclic(MultiDiGraphView const &, std::unordered_set const &); -tl::optional is_acyclic(DiGraphView const &); -tl::optional is_acyclic(MultiDiGraphView const &); +std::optional is_acyclic(DiGraphView const &); +std::optional is_acyclic(MultiDiGraphView const &); std::unordered_map> get_dominators(DiGraphView const &); @@ -240,15 +240,15 @@ std::unordered_set get_dominators(DiGraphView const &, std::unordered_map> get_post_dominators(DiGraphView const &); -std::unordered_map> +std::unordered_map> get_imm_dominators(DiGraphView const &); -std::unordered_map> +std::unordered_map> get_imm_post_dominators(DiGraphView const &); -tl::optional get_imm_post_dominator(DiGraphView const &, Node const &); -tl::optional get_imm_post_dominator(MultiDiGraphView const &, - Node const &); -tl::optional get_imm_post_dominator(DiGraphView const &, - std::unordered_set const &); +std::optional get_imm_post_dominator(DiGraphView const &, Node const &); +std::optional get_imm_post_dominator(MultiDiGraphView const &, + Node const &); +std::optional get_imm_post_dominator(DiGraphView const &, + std::unordered_set const &); std::vector get_dfs_ordering(DiGraphView const &, @@ -328,8 +328,8 @@ void export_as_dot( DotFile &, DiGraphView const &, std::function const &, - tl::optional const &> = - tl::nullopt); + std::optional> = + std::nullopt); } // namespace FlexFlow diff --git a/lib/utils/include/utils/graph/labelled/output_labelled_open.h b/lib/utils/include/utils/graph/labelled/output_labelled_open.h index cb41a7158a..eb406d1804 100644 --- a/lib/utils/include/utils/graph/labelled/output_labelled_open.h +++ b/lib/utils/include/utils/graph/labelled/output_labelled_open.h @@ -129,12 +129,12 @@ struct OutputLabelledOpenMultiDiGraph } template - EdgeLabel const &at(variant const &e) const { + EdgeLabel const &at(std::variant const &e) const { return visit([&](auto const &e) -> auto const & { return this->at(e); }, e); } template - EdgeLabel &at(variant const &e) { + EdgeLabel &at(std::variant const &e) { return visit([&](auto const &e) -> auto & { return this->at(e); }, e); } diff --git a/lib/utils/include/utils/graph/open_edge.h b/lib/utils/include/utils/graph/open_edge.h index e83c58196b..37e98a419d 100644 --- a/lib/utils/include/utils/graph/open_edge.h +++ b/lib/utils/include/utils/graph/open_edge.h @@ -6,11 +6,11 @@ namespace FlexFlow { using OpenMultiDiEdge = - variant; + std::variant; -using DownwardOpenMultiDiEdge = variant; +using DownwardOpenMultiDiEdge = std::variant; -using UpwardOpenMultiDiEdge = variant; +using UpwardOpenMultiDiEdge = std::variant; bool is_input_edge(OpenMultiDiEdge const &); bool is_output_edge(OpenMultiDiEdge const &); diff --git a/lib/utils/include/utils/graph/query_set.h b/lib/utils/include/utils/graph/query_set.h index 8192949cb0..c835afa6a6 100644 --- a/lib/utils/include/utils/graph/query_set.h +++ b/lib/utils/include/utils/graph/query_set.h @@ -4,7 +4,7 @@ #include "utils/bidict.h" #include "utils/containers.decl.h" #include "utils/exception.h" -#include "utils/optional.h" +#include #include namespace FlexFlow { @@ -16,7 +16,7 @@ struct query_set { query_set(std::unordered_set const &query) : query(query) {} - query_set(optional> const &query) : query(query) {} + query_set(std::optional> const &query) : query(query) {} query_set(std::initializer_list const &l) : query_set(std::unordered_set{l}) {} @@ -43,11 +43,11 @@ struct query_set { } static query_set matchall() { - return {nullopt}; + return {std::nullopt}; } private: - optional> query; + std::optional> query; }; template diff --git a/lib/utils/include/utils/graph/serialparallel.h b/lib/utils/include/utils/graph/serialparallel.h index b58281de7d..47bcb4031e 100644 --- a/lib/utils/include/utils/graph/serialparallel.h +++ b/lib/utils/include/utils/graph/serialparallel.h @@ -4,7 +4,7 @@ #include "digraph.h" #include "multidigraph.h" #include "utils/optional.h" -#include "utils/variant.h" +#include #include namespace FlexFlow { @@ -12,22 +12,22 @@ namespace FlexFlow { Node find_source_node(DiGraphView const &); Node find_sink_node(DiGraphView const &); -optional find_bottleneck_node(DiGraphView const &); +std::optional find_bottleneck_node(DiGraphView const &); struct Parallel; struct Serial { - std::vector> children; + std::vector> children; }; struct Parallel { - std::vector> children; + std::vector> children; }; FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(Parallel, children); FF_VISITABLE_STRUCT_NONSTANDARD_CONSTRUCTION(Serial, children); -using SerialParallelDecomposition = variant; +using SerialParallelDecomposition = std::variant; SerialParallelDecomposition get_serial_parallel_decomposition(DiGraphView const &); diff --git a/lib/utils/include/utils/graph/traversal.h b/lib/utils/include/utils/graph/traversal.h index a4101de64d..3c3992cd53 100644 --- a/lib/utils/include/utils/graph/traversal.h +++ b/lib/utils/include/utils/graph/traversal.h @@ -76,7 +76,7 @@ struct bfs_iterator { bfs_iterator(DiGraphView const &, std::queue const &, - optional> const &); + std::optional> const &); bfs_iterator(DiGraphView const &, std::unordered_set const &starting_points); @@ -91,7 +91,7 @@ struct bfs_iterator { private: DiGraphView graph; std::queue q; - optional> seen; + std::optional> seen; }; struct CheckedDFSView { diff --git a/lib/utils/include/utils/graph/views.h b/lib/utils/include/utils/graph/views.h index 776a72e6d5..e891a948f0 100644 --- a/lib/utils/include/utils/graph/views.h +++ b/lib/utils/include/utils/graph/views.h @@ -6,7 +6,6 @@ #include "labelled_graphs.h" #include "multidigraph.h" #include "open_graphs.h" -#include "tl/optional.hpp" #include "undirected.h" #include "utils/bidict.h" #include "utils/graph/digraph_interfaces.h" @@ -217,8 +216,8 @@ struct SingleSourceNodeView : public IDiGraphView { private: DiGraphView g; - optional singleton_src; - optional joined_view; + std::optional singleton_src; + std::optional joined_view; std::unique_ptr added_edges_view; }; diff --git a/lib/utils/include/utils/invoke.h b/lib/utils/include/utils/invoke.h deleted file mode 100644 index cee1eaee0e..0000000000 --- a/lib/utils/include/utils/invoke.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef _FLEXFLOW_UTILS_INCLUDE_UTILS_INVOKE_H -#define _FLEXFLOW_UTILS_INCLUDE_UTILS_INVOKE_H - -#include "invoke.hpp/invoke.hpp" - -namespace FlexFlow { - -using namespace ::invoke_hpp; - -} - -#endif diff --git a/lib/utils/include/utils/json.h b/lib/utils/include/utils/json.h index a753c52daa..010943a9f9 100644 --- a/lib/utils/include/utils/json.h +++ b/lib/utils/include/utils/json.h @@ -149,7 +149,7 @@ struct VariantToJsonFunctor { }; template -void variant_to_json(json &j, variant const &v) { +void variant_to_json(json &j, std::variant const &v) { visit(::FlexFlow::VariantToJsonFunctor{j}, v.value); } @@ -160,8 +160,9 @@ struct VariantFromJsonFunctor { json const &j; template - optional operator()(std::integral_constant const &) const { - using Type = typename variant_alternative::type; + std::optional + operator()(std::integral_constant const &) const { + using Type = typename std::variant_alternative::type; if (visit_struct::get_name()) { return j.at("value").get(); @@ -170,8 +171,8 @@ struct VariantFromJsonFunctor { }; template -variant variant_from_json(json const &j) { - ::FlexFlow::VariantFromJsonFunctor<::FlexFlow::variant> func(j); +std::variant variant_from_json(json const &j) { + ::FlexFlow::VariantFromJsonFunctor> func(j); auto result = seq_map(func, seq_enumerate_args_t{}); if (!result.has_value()) { throw ::FlexFlow::mk_runtime_error("Invalid type {} found in json", @@ -219,9 +220,9 @@ struct adl_serializer< template struct adl_serializer< - ::FlexFlow::optional, + std::optional, typename std::enable_if<::FlexFlow::is_jsonable::value>::type> { - static void to_json(json &j, ::FlexFlow::optional const &t) { + static void to_json(json &j, std::optional const &t) { if (t.has_value()) { to_json(j, t.value()); } else { @@ -229,9 +230,9 @@ struct adl_serializer< } } - static void from_json(json const &j, ::FlexFlow::optional &t) { + static void from_json(json const &j, std::optional &t) { if (j == nullptr) { - t = ::FlexFlow::nullopt; + t = std::nullopt; } else { t = j.get(); } @@ -239,15 +240,15 @@ struct adl_serializer< }; template -struct adl_serializer<::FlexFlow::variant, +struct adl_serializer, typename std::enable_if<::FlexFlow::elements_satisfy< ::FlexFlow::is_json_serializable, - ::FlexFlow::variant>::value>::type> { - static void to_json(json &j, ::FlexFlow::variant const &v) { + std::variant>::value>::type> { + static void to_json(json &j, std::variant const &v) { return ::FlexFlow::variant_to_json(j, v); } - static ::FlexFlow::variant from_json(json const &j) { + static std::variant from_json(json const &j) { return ::FlexFlow::variant_from_json(j); } }; diff --git a/lib/utils/include/utils/optional.decl b/lib/utils/include/utils/optional.decl index 370026fcc0..82f4bd984d 100644 --- a/lib/utils/include/utils/optional.decl +++ b/lib/utils/include/utils/optional.decl @@ -1,17 +1,15 @@ #ifndef _FLEXFLOW_UTILS_OPTIONAL_H #define _FLEXFLOW_UTILS_OPTIONAL_H -#include "tl/optional.hpp" +#include namespace FlexFlow { -using namespace tl; - template -T const &unwrap(optional const &o, F const &f); +T const &unwrap(std::optional const &o, F const &f); template -T const &assert_unwrap(optional const &o); +T const &assert_unwrap(std::optional const &o); } // namespace FlexFlow diff --git a/lib/utils/include/utils/optional.h b/lib/utils/include/utils/optional.h index 43d2ef4104..71b6d9d975 100644 --- a/lib/utils/include/utils/optional.h +++ b/lib/utils/include/utils/optional.h @@ -8,7 +8,7 @@ namespace FlexFlow { template -T const &unwrap(optional const &o, F const &f) { +T const &unwrap(std::optional const &o, F const &f) { if (o.has_value()) { return o.value(); } else { @@ -18,7 +18,7 @@ T const &unwrap(optional const &o, F const &f) { } template -T const &assert_unwrap(optional const &o) { +T const &assert_unwrap(std::optional const &o) { assert(o.has_value()); return o.value(); } @@ -28,9 +28,9 @@ T const &assert_unwrap(optional const &o) { namespace fmt { template -struct formatter<::FlexFlow::optional> : formatter { +struct formatter<::std::optional> : formatter { template - auto format(::FlexFlow::optional const &q, FormatContext &ctx) + auto format(::std::optional const &q, FormatContext &ctx) -> decltype(ctx.out()) { std::string result; if (q.has_value()) { diff --git a/lib/utils/include/utils/sequence.h b/lib/utils/include/utils/sequence.h index 67c2e72ac1..6c66949fd8 100644 --- a/lib/utils/include/utils/sequence.h +++ b/lib/utils/include/utils/sequence.h @@ -1,9 +1,9 @@ #ifndef _FLEXFLOW_UTILS_INCLUDE_UTILS_SEQUENCE_H #define _FLEXFLOW_UTILS_INCLUDE_UTILS_SEQUENCE_H -#include "optional.h" #include "utils/tuple.h" #include "utils/visitable_core.h" +#include #include namespace FlexFlow { @@ -119,7 +119,7 @@ auto seq_select(F const &f, int i, seq const &s) template auto seq_select(F const &f, int i, seq<> const &) -> decltype(f(std::declval>())) { - return nullopt; + return std::nullopt; } template diff --git a/lib/utils/include/utils/stack_map.h b/lib/utils/include/utils/stack_map.h index f2cdf0d88b..76e6e951df 100644 --- a/lib/utils/include/utils/stack_map.h +++ b/lib/utils/include/utils/stack_map.h @@ -2,7 +2,6 @@ #define _FLEXFLOW_UTILS_STACK_MAP_H #include "containers.h" -#include "optional.h" #include "stack_vector.h" namespace std { @@ -22,7 +21,7 @@ struct stack_map { stack_map() = default; V &operator[](K const &k) { - optional idx = get_idx(k); + std::optional idx = get_idx(k); if (!idx.has_value()) { this->contents.push_back({k, {}}); idx = this->contents.size() - 1; @@ -35,7 +34,7 @@ struct stack_map { } void insert(K const &k, V const &v) { - optional idx = get_idx(k); + std::optional idx = get_idx(k); if (!idx.has_value()) { this->contents.push_back({k, v}); } else { @@ -116,14 +115,14 @@ struct stack_map { return sorted_by(this->contents, comparator); } - optional get_idx(K const &k) const { + std::optional get_idx(K const &k) const { for (std::size_t idx = 0; idx < contents.size(); idx++) { if (contents.at(idx).first == k) { return idx; } } - return nullopt; + return std::nullopt; } stack_vector, MAXSIZE> contents; diff --git a/lib/utils/include/utils/stack_vector.h b/lib/utils/include/utils/stack_vector.h index 3d5a433725..fe665ed749 100644 --- a/lib/utils/include/utils/stack_vector.h +++ b/lib/utils/include/utils/stack_vector.h @@ -3,12 +3,12 @@ #include "containers.h" #include "hash-utils.h" -#include "optional.h" #include "utils/fmt.h" #include "utils/test_types.h" #include "utils/type_traits.h" #include #include +#include #include namespace FlexFlow { @@ -16,19 +16,20 @@ namespace FlexFlow { template struct stack_vector { private: - using element_type = - conditional_t::value, T, optional>; + using element_type = conditional_t::value, + T, + std::optional>; static T const &get_value(T const &t) { return t; } - static T const &get_value(optional const &t) { + static T const &get_value(std::optional const &t) { return t.value(); } static T &get_value(T &t) { return t; } - static T &get_value(optional &t) { + static T &get_value(std::optional &t) { return t.value(); } diff --git a/lib/utils/include/utils/tuple.h b/lib/utils/include/utils/tuple.h index 202e62b5ad..71c369df6a 100644 --- a/lib/utils/include/utils/tuple.h +++ b/lib/utils/include/utils/tuple.h @@ -1,8 +1,9 @@ #ifndef _FLEXFLOW_UTILS_TUPLE_H #define _FLEXFLOW_UTILS_TUPLE_H -#include "utils/any.h" #include "utils/exception.decl.h" +#include "utils/type_traits_core.h" +#include #include #include #include @@ -48,11 +49,11 @@ void visit_tuple(Visitor &v, std::tuple const &tup) { struct tuple_get_visitor { tuple_get_visitor() = delete; - tuple_get_visitor(int requested_idx, any &result) + tuple_get_visitor(int requested_idx, std::any &result) : requested_idx(requested_idx), result(result) {} int requested_idx; - any &result; + std::any &result; template void operator()(int idx, T const &t) { @@ -63,13 +64,13 @@ struct tuple_get_visitor { }; template -any get(std::tuple const &t, int idx) { +std::any get(std::tuple const &t, int idx) { size_t tuple_size = std::tuple_size::value; if (idx < 0 || idx >= tuple_size) { throw mk_runtime_error( "Error: idx {} out of bounds for tuple of size {}", idx, tuple_size); } - any result; + std::any result; visit_tuple(t, tuple_get_visitor{idx, result}); return result; } diff --git a/lib/utils/include/utils/type_traits.h b/lib/utils/include/utils/type_traits.h index ee45e8dc2e..0c0408723d 100644 --- a/lib/utils/include/utils/type_traits.h +++ b/lib/utils/include/utils/type_traits.h @@ -1,7 +1,6 @@ #ifndef _FLEXFLOW_UTILS_INCLUDE_TYPE_TRAITS_H #define _FLEXFLOW_UTILS_INCLUDE_TYPE_TRAITS_H -#include "utils/invoke.h" #include "utils/metafunction.h" #include "utils/type_traits_core.h" #include "utils/visitable_core.h" diff --git a/lib/utils/include/utils/variant.h b/lib/utils/include/utils/variant.h index b1a1dc1081..b3ae3de115 100644 --- a/lib/utils/include/utils/variant.h +++ b/lib/utils/include/utils/variant.h @@ -1,28 +1,12 @@ #ifndef _FLEXFLOW_UTILS_VARIANT_H #define _FLEXFLOW_UTILS_VARIANT_H -#include "mpark/variant.hpp" -#include "utils/optional.h" #include "utils/type_traits.h" +#include +#include namespace FlexFlow { -/* using mp = mpark; */ - -/* template */ -/* using variant = ::mpark::variant; */ - -using namespace ::mpark; - -/* template */ -/* using optional = ::tl::optional; */ - -/* template */ -/* using get = ::mpark::get; */ - -/* template */ -/* using holds_alternative = ::mpark::holds_alternative; */ - template struct pack_contains_all_of; @@ -35,14 +19,14 @@ template struct pack_contains_all_of> : std::false_type {}; template -struct pack_contains_all_of, Needles...> +struct pack_contains_all_of, Needles...> : pack_contains_all_of, Needles...> {}; template -bool is(variant const &v) { +bool is(std::variant const &v) { static_assert(pack_contains_all_of, T, TRest...>::value, ""); - return holds_alternative(v) || is(v); + return std::holds_alternative(v) || is(v); } /* template */ @@ -54,89 +38,90 @@ bool is(variant const &v) { /* using type = mpark::variant; */ /* }; */ template