Skip to content

Commit

Permalink
Merge branch 'inference' into optimize_attn_v2
Browse files Browse the repository at this point in the history
  • Loading branch information
xinhaoc authored Oct 24, 2023
2 parents 5d2dbbd + 1105f4e commit 20b2b2b
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 139 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/helpers/prebuild_legion.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ else
echo "Pre-building Legion with GPU backend: ${gpu_backend}"
fi

if [[ "${gpu_backend}" == "cuda" || "${FF_GPU_BACKEND}" == "hip_cuda" ]]; then
if [[ "${gpu_backend}" == "cuda" || "${gpu_backend}" == "hip_cuda" ]]; then
# Check that CUDA version is supported. Versions above 12.0 not supported because we don't publish docker images for it yet.
if [[ "$gpu_backend_version" != @(11.1|11.2|11.3|11.4|11.5|11.6|11.7|11.8|12.0) ]]; then
echo "cuda_version is not supported, please choose among {11.1|11.2|11.3|11.4|11.5|11.6|11.7|11.8|12.0}"
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/prebuild-legion.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
matrix:
gpu_backend: ["cuda", "hip_rocm"]
gpu_backend_version: ["11.8", "5.6"]
python_version: "3.11"
python_version: ["3.11"]
exclude:
- gpu_backend: "cuda"
gpu_backend_version: "5.6"
Expand All @@ -42,12 +42,12 @@ jobs:

- name: Build Legion
env:
FF_GPU_BACKEND: ${{ matrix.gpu_backend }}
gpu_backend: ${{ matrix.gpu_backend }}
gpu_backend_version: ${{ matrix.gpu_backend_version }}
python_version: ${{ matrix.python_version }}
run: .github/workflows/helpers/prebuild_legion.sh

- name: Archive compiled Legion library (CUDA)
env:
FF_GPU_BACKEND: ${{ matrix.gpu_backend }}
uses: actions/upload-artifact@v3
with:
name: legion_ubuntu-20.04_${{ matrix.gpu_backend }}-${{ matrix.gpu_backend_version }}_py${{ matrix.python_version }}
Expand Down
260 changes: 130 additions & 130 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -504,156 +504,156 @@ if(NOT BUILD_LEGION_ONLY)
install(PROGRAMS ${CMAKE_BINARY_DIR}/flexflow_python DESTINATION "bin")
endif()
endif()
endif()

if (INFERENCE_TESTS)
target_link_libraries(flexflow "${TORCH_LIBRARIES}")
set_property(TARGET flexflow PROPERTY CXX_STANDARD 14)
endif()

# build binary
option(FF_BUILD_TOKENIZER "build tokenizer=cpp for LLM serving" ON)
option(FF_BUILD_RESNET "build resnet example" OFF)
option(FF_BUILD_RESNEXT "build resnext example" OFF)
option(FF_BUILD_ALEXNET "build alexnet example" OFF)
option(FF_BUILD_DLRM "build DLRM example" OFF)
option(FF_BUILD_XDL "build XDL example" OFF)
option(FF_BUILD_INCEPTION "build inception example" OFF)
option(FF_BUILD_CANDLE_UNO "build candle uno example" OFF)
option(FF_BUILD_TRANSFORMER "build transformer example" OFF)
option(FF_BUILD_MOE "build mixture of experts example" OFF)
option(FF_BUILD_MLP_UNIFY "build mlp unify example" OFF)
option(FF_BUILD_SPLIT_TEST "build split test example" OFF)
option(FF_BUILD_SPLIT_TEST_2 "build split test 2 example" OFF)
option(FF_BUILD_MLP_UNIFY_INFERENCE "build mlp unify inference example" OFF)
option(FF_BUILD_ALL_INFERENCE_EXAMPLES "build all inference examples. Overrides others" OFF)
option(FF_BUILD_ALL_EXAMPLES "build all examples. Overrides others" OFF)
option(FF_BUILD_UNIT_TESTS "build non-operator unit tests" OFF)
option(FF_BUILD_SUBSTITUTION_TOOL "build substitution conversion tool" OFF)
option(FF_BUILD_VISUALIZATION_TOOL "build substitution visualization tool" OFF)

if(FF_BUILD_UNIT_TESTS)
set(BUILD_GMOCK OFF)
add_subdirectory(deps/googletest)
enable_testing()
add_subdirectory(tests/unit)
endif()

if(FF_BUILD_SUBSTITUTION_TOOL)
add_subdirectory(tools/protobuf_to_json)

if (INFERENCE_TESTS)
target_link_libraries(flexflow "${TORCH_LIBRARIES}")
set_property(TARGET flexflow PROPERTY CXX_STANDARD 14)
endif()

if(FF_BUILD_VISUALIZATION_TOOL)
add_subdirectory(tools/substitutions_to_dot)
# build binary
option(FF_BUILD_TOKENIZER "build tokenizer=cpp for LLM serving" ON)
option(FF_BUILD_RESNET "build resnet example" OFF)
option(FF_BUILD_RESNEXT "build resnext example" OFF)
option(FF_BUILD_ALEXNET "build alexnet example" OFF)
option(FF_BUILD_DLRM "build DLRM example" OFF)
option(FF_BUILD_XDL "build XDL example" OFF)
option(FF_BUILD_INCEPTION "build inception example" OFF)
option(FF_BUILD_CANDLE_UNO "build candle uno example" OFF)
option(FF_BUILD_TRANSFORMER "build transformer example" OFF)
option(FF_BUILD_MOE "build mixture of experts example" OFF)
option(FF_BUILD_MLP_UNIFY "build mlp unify example" OFF)
option(FF_BUILD_SPLIT_TEST "build split test example" OFF)
option(FF_BUILD_SPLIT_TEST_2 "build split test 2 example" OFF)
option(FF_BUILD_MLP_UNIFY_INFERENCE "build mlp unify inference example" OFF)
option(FF_BUILD_ALL_INFERENCE_EXAMPLES "build all inference examples. Overrides others" OFF)
option(FF_BUILD_ALL_EXAMPLES "build all examples. Overrides others" OFF)
option(FF_BUILD_UNIT_TESTS "build non-operator unit tests" OFF)
option(FF_BUILD_SUBSTITUTION_TOOL "build substitution conversion tool" OFF)
option(FF_BUILD_VISUALIZATION_TOOL "build substitution visualization tool" OFF)

if(FF_BUILD_UNIT_TESTS)
set(BUILD_GMOCK OFF)
add_subdirectory(deps/googletest)
enable_testing()
add_subdirectory(tests/unit)
endif()

if(FF_BUILD_ALL_INFERENCE_EXAMPLES OR FF_BUILD_TOKENIZER)
if (FF_GPU_BACKEND STREQUAL "hip_rocm")
SET(SPM_USE_BUILTIN_PROTOBUF OFF CACHE BOOL "Use builtin version of protobuf to compile SentencePiece")
endif()
# Ensure Rust is installed
execute_process(COMMAND rustc --version
RESULT_VARIABLE RUST_COMMAND_RESULT
OUTPUT_VARIABLE RUSTC_OUTPUT
ERROR_QUIET)
if(NOT RUST_COMMAND_RESULT EQUAL 0)
message(FATAL_ERROR "Rust is not installed on the system. Please install it by running: 'curl https://sh.rustup.rs -sSf | sh -s -- -y' and following the instructions on the screen.")
if(FF_BUILD_SUBSTITUTION_TOOL)
add_subdirectory(tools/protobuf_to_json)
endif()

if(FF_BUILD_VISUALIZATION_TOOL)
add_subdirectory(tools/substitutions_to_dot)
endif()

if(FF_BUILD_ALL_INFERENCE_EXAMPLES OR FF_BUILD_TOKENIZER)
if (FF_GPU_BACKEND STREQUAL "hip_rocm")
SET(SPM_USE_BUILTIN_PROTOBUF OFF CACHE BOOL "Use builtin version of protobuf to compile SentencePiece")
endif()
# Ensure Rust is installed
execute_process(COMMAND rustc --version
RESULT_VARIABLE RUST_COMMAND_RESULT
OUTPUT_VARIABLE RUSTC_OUTPUT
ERROR_QUIET)
if(NOT RUST_COMMAND_RESULT EQUAL 0)
message(FATAL_ERROR "Rust is not installed on the system. Please install it by running: 'curl https://sh.rustup.rs -sSf | sh -s -- -y' and following the instructions on the screen.")
endif()
# Ensure Cargo is installed
execute_process(COMMAND cargo --version
RESULT_VARIABLE CARGO_RESULT
OUTPUT_QUIET ERROR_QUIET)
if(NOT CARGO_RESULT EQUAL 0)
message(FATAL_ERROR "Rust is installed, but cargo is not. Please install it by running: 'curl https://sh.rustup.rs -sSf | sh -s -- -y' and following the instructions on the screen.")
endif()
add_subdirectory(deps/tokenizers-cpp tokenizers EXCLUDE_FROM_ALL)
target_include_directories(flexflow PUBLIC deps/tokenizers-cpp/include)
target_link_libraries(flexflow tokenizers_cpp)
endif()
# Ensure Cargo is installed
execute_process(COMMAND cargo --version
RESULT_VARIABLE CARGO_RESULT
OUTPUT_QUIET ERROR_QUIET)
if(NOT CARGO_RESULT EQUAL 0)
message(FATAL_ERROR "Rust is installed, but cargo is not. Please install it by running: 'curl https://sh.rustup.rs -sSf | sh -s -- -y' and following the instructions on the screen.")
if(FF_BUILD_RESNET OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/ResNet)
endif()
add_subdirectory(deps/tokenizers-cpp tokenizers EXCLUDE_FROM_ALL)
target_include_directories(flexflow PUBLIC deps/tokenizers-cpp/include)
target_link_libraries(flexflow tokenizers_cpp)
endif()
if(FF_BUILD_RESNET OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/ResNet)
endif()

if(FF_BUILD_RESNEXT OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/resnext50)
endif()
if(FF_BUILD_RESNEXT OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/resnext50)
endif()

if(FF_BUILD_ALEXNET OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/AlexNet)
endif()
if(FF_BUILD_ALEXNET OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/AlexNet)
endif()

if(FF_BUILD_MLP_UNIFY OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/MLP_Unify)
endif()
if(FF_BUILD_MLP_UNIFY OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/MLP_Unify)
endif()

if(FF_BUILD_SPLIT_TEST OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/split_test)
endif()
if(FF_BUILD_SPLIT_TEST OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/split_test)
endif()

if(FF_BUILD_SPLIT_TEST_2 OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/split_test_2)
endif()
if(FF_BUILD_SPLIT_TEST_2 OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/split_test_2)
endif()

if(FF_BUILD_INCEPTION OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/InceptionV3)
endif()
if(FF_BUILD_INCEPTION OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/InceptionV3)
endif()

#TODO: Once functional add to BUILD_ALL_EXAMPLES
if(FF_BUILD_CANDLE_UNO OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/candle_uno)
endif()
#TODO: Once functional add to BUILD_ALL_EXAMPLES
if(FF_BUILD_CANDLE_UNO OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/candle_uno)
endif()

if(FF_BUILD_DLRM OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/DLRM)
if(FF_BUILD_DLRM OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/DLRM)

#add_executable(generate_dlrm_hetero_strategy src/runtime/dlrm_strategy_hetero.cc)
#target_include_directories(generate_dlrm_hetero_strategy PUBLIC ${FLEXFLOW_INCLUDE_DIRS})
#add_executable(generate_dlrm_hetero_strategy src/runtime/dlrm_strategy_hetero.cc)
#target_include_directories(generate_dlrm_hetero_strategy PUBLIC ${FLEXFLOW_INCLUDE_DIRS})

#add_executable(generate_dlrm_strategy src/runtime/dlrm_strategy.cc)
#target_include_directories(generate_dlrm_strategy PUBLIC ${FLEXFLOW_INCLUDE_DIRS})
endif()
#add_executable(generate_dlrm_strategy src/runtime/dlrm_strategy.cc)
#target_include_directories(generate_dlrm_strategy PUBLIC ${FLEXFLOW_INCLUDE_DIRS})
endif()

if(FF_BUILD_XDL OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/XDL)
endif()
if(FF_BUILD_XDL OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/XDL)
endif()

if(FF_BUILD_TRANSFORMER OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/Transformer)
endif()
if(FF_BUILD_TRANSFORMER OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/Transformer)
endif()

if(FF_BUILD_MOE OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/mixture_of_experts)
endif()
if(FF_BUILD_MOE OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(examples/cpp/mixture_of_experts)
endif()

if(FF_BUILD_ALL_INFERENCE_EXAMPLES OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(inference/spec_infer)
add_subdirectory(inference/incr_decoding)
endif()
if(FF_BUILD_ALL_INFERENCE_EXAMPLES OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(inference/spec_infer)
add_subdirectory(inference/incr_decoding)
endif()


# installation
set(INCLUDE_DEST "include")
set(LIB_DEST "lib")
install(FILES ${FLEXFLOW_HDR} DESTINATION ${INCLUDE_DEST})
install(TARGETS flexflow DESTINATION ${LIB_DEST})
# install python
if (FF_USE_PYTHON)
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import site, os; print([pkg for func in (site.getsitepackages(), site.getusersitepackages()) for pkg in ([func] if isinstance(func, str) else func) if os.access(pkg, os.W_OK)][0])" OUTPUT_VARIABLE PY_DEST OUTPUT_STRIP_TRAILING_WHITESPACE)
if (NOT FF_BUILD_FROM_PYPI)
install(
DIRECTORY ${FLEXFLOW_ROOT}/python/flexflow/
DESTINATION ${PY_DEST}/flexflow
FILES_MATCHING
PATTERN "*.py")
else()
# pip automatically installs all *.py files in the python/flexflow folder, but because flexflow_cffi_header.py is generated at build time, we have to install it manually.
install(
PROGRAMS ${FLEXFLOW_ROOT}/python/flexflow/core/flexflow_cffi_header.py
DESTINATION ${PY_DEST}/flexflow/core
)
# Use setup.py script to re-install the Python bindings library with the right library paths.
# Need to put the instructions in a subfolder because of issue below:
# https://stackoverflow.com/questions/43875499/do-post-processing-after-make-install-in-cmake
add_subdirectory(cmake/pip_install)
# installation
set(INCLUDE_DEST "include")
set(LIB_DEST "lib")
install(FILES ${FLEXFLOW_HDR} DESTINATION ${INCLUDE_DEST})
install(TARGETS flexflow DESTINATION ${LIB_DEST})
# install python
if (FF_USE_PYTHON)
execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import site, os; print([pkg for func in (site.getsitepackages(), site.getusersitepackages()) for pkg in ([func] if isinstance(func, str) else func) if os.access(pkg, os.W_OK)][0])" OUTPUT_VARIABLE PY_DEST OUTPUT_STRIP_TRAILING_WHITESPACE)
if (NOT FF_BUILD_FROM_PYPI)
install(
DIRECTORY ${FLEXFLOW_ROOT}/python/flexflow/
DESTINATION ${PY_DEST}/flexflow
FILES_MATCHING
PATTERN "*.py")
else()
# pip automatically installs all *.py files in the python/flexflow folder, but because flexflow_cffi_header.py is generated at build time, we have to install it manually.
install(
PROGRAMS ${FLEXFLOW_ROOT}/python/flexflow/core/flexflow_cffi_header.py
DESTINATION ${PY_DEST}/flexflow/core
)
# Use setup.py script to re-install the Python bindings library with the right library paths.
# Need to put the instructions in a subfolder because of issue below:
# https://stackoverflow.com/questions/43875499/do-post-processing-after-make-install-in-cmake
add_subdirectory(cmake/pip_install)
endif()
endif()
endif()
endif() # if(NOT BUILD_LEGION_ONLY)
8 changes: 6 additions & 2 deletions cmake/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,12 @@ if(CUDA_FOUND)
message( STATUS "CUDA Detected CUDA_ARCH : ${DETECTED_CUDA_ARCH}" )
set(FF_CUDA_ARCH ${DETECTED_CUDA_ARCH})
# Set FF_CUDA_ARCH to the list of all GPU architectures compatible with FlexFlow
elseif("${FF_CUDA_ARCH}" STREQUAL "all")
set(FF_CUDA_ARCH 60,61,62,70,72,75,80,86,90)
elseif("${FF_CUDA_ARCH}" STREQUAL "all")
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
set(FF_CUDA_ARCH 60,61,62,70,72,75,80,86,90)
else()
set(FF_CUDA_ARCH 60,61,62,70,72,75,80,86)
endif()
endif()

# create CUDA_GENCODE list based on FF_CUDA_ARCH
Expand Down
4 changes: 2 additions & 2 deletions config/config.linux
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#LD_FLAGS=${LD_FLAGS+=""}

#set install dir
#INSTALL_DIR=
INSTALL_DIR=${INSTALL_DIR:-}

# set build type
BUILD_TYPE=${BUILD_TYPE:-Release}
Expand Down Expand Up @@ -100,7 +100,7 @@ fi

function get_build_configs() {
# Create a string with the values of the variables set in this script
BUILD_CONFIGS="FF_CUDA_ARCH=${FF_CUDA_ARCH} FF_HIP_ARCH=${FF_HIP_ARCH} CUDNN_DIR=${CUDNN_DIR} CUDA_DIR=${CUDA_DIR} NCCL_DIR=${NCCL_DIR} FF_USE_PYTHON=${FF_USE_PYTHON} BUILD_LEGION_ONLY=${BUILD_LEGION_ONLY} FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT} FF_UCX_URL=${FF_UCX_URL} FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS} FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES} FF_BUILD_ALL_INFERENCE_EXAMPLES=${FF_BUILD_ALL_INFERENCE_EXAMPLES} FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS} FF_USE_PREBUILT_NCCL=${FF_USE_PREBUILT_NCCL} FF_USE_PREBUILT_LEGION=${FF_USE_PREBUILT_LEGION} FF_USE_ALL_PREBUILT_LIBRARIES=${FF_USE_ALL_PREBUILT_LIBRARIES} FF_USE_AVX2=${FF_USE_AVX2} FF_MAX_DIM=${FF_MAX_DIM} ROCM_PATH=${ROCM_PATH} FF_GPU_BACKEND=${FF_GPU_BACKEND}"
BUILD_CONFIGS="FF_CUDA_ARCH=${FF_CUDA_ARCH} FF_HIP_ARCH=${FF_HIP_ARCH} CUDNN_DIR=${CUDNN_DIR} CUDA_DIR=${CUDA_DIR} NCCL_DIR=${NCCL_DIR} FF_USE_PYTHON=${FF_USE_PYTHON} BUILD_LEGION_ONLY=${BUILD_LEGION_ONLY} FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT} FF_UCX_URL=${FF_UCX_URL} FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS} FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES} FF_BUILD_ALL_INFERENCE_EXAMPLES=${FF_BUILD_ALL_INFERENCE_EXAMPLES} FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS} FF_USE_PREBUILT_NCCL=${FF_USE_PREBUILT_NCCL} FF_USE_PREBUILT_LEGION=${FF_USE_PREBUILT_LEGION} FF_USE_ALL_PREBUILT_LIBRARIES=${FF_USE_ALL_PREBUILT_LIBRARIES} FF_USE_AVX2=${FF_USE_AVX2} FF_MAX_DIM=${FF_MAX_DIM} ROCM_PATH=${ROCM_PATH} FF_GPU_BACKEND=${FF_GPU_BACKEND} INSTALL_DIR=${INSTALL_DIR}"
}

if [[ -n "$1" && ( "$1" == "CMAKE_FLAGS" || "$1" == "CUDA_PATH" ) ]]; then
Expand Down

0 comments on commit 20b2b2b

Please sign in to comment.