Skip to content

Commit

Permalink
Merge branch 'peft' into peft_xinhao
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro authored Apr 8, 2024
2 parents dd1366f + 0ed889a commit 17257cd
Show file tree
Hide file tree
Showing 64 changed files with 4,520 additions and 3,497 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docker-build-skip.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
cuda_version: ["11.1", "11.2", "11.3", "11.4", "11.5", "11.6", "11.7", "11.8", "12.0"]
cuda_version: ["11.1", "11.6", "11.7", "11.8", "12.0", "12.1", "12.2"]
fail-fast: false
steps:
- run: 'echo "No docker-build required"'
12 changes: 6 additions & 6 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,27 +103,27 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
cuda_version: ["11.1", "11.2", "11.3", "11.4", "11.5", "11.6", "11.7", "11.8", "12.0"]
cuda_version: ["11.1", "11.6", "11.7", "11.8", "12.0", "12.1", "12.2"]
fail-fast: false
env:
FF_GPU_BACKEND: "cuda"
cuda_version: ${{ matrix.cuda_version }}
steps:
- name: Checkout Git Repository
if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '11.8' }}
if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }}
uses: actions/checkout@v3
with:
submodules: recursive

- name: Free additional space on runner
if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '11.8' }}
if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }}
run: .github/workflows/helpers/free_space_on_runner.sh

- name: Build Docker container
if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '11.8' }}
if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }}
env:
deploy_needed: ${{ ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' }}
build_needed: ${{ matrix.cuda_version == '11.8' }}
build_needed: ${{ matrix.cuda_version == '12.0' }}
run: |
# On push to inference, build for all compatible architectures, so that we can publish
# a pre-built general-purpose image. On all other cases, only build for one architecture
Expand All @@ -137,7 +137,7 @@ jobs:
fi
- name: Check availability of flexflow modules in Python
if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '11.8' }}
if: ${{ ( ( github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' ) && github.ref_name == 'inference' ) || matrix.cuda_version == '12.0' }}
run: docker run --entrypoint /bin/bash flexflow-${FF_GPU_BACKEND}-${cuda_version}:latest -c "export LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH; sudo ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1; python -c 'import flexflow.core; import flexflow.serve as ff; exit()'"

- name: Publish Docker environment image (on push to inference)
Expand Down
21 changes: 2 additions & 19 deletions .github/workflows/gpu-ci.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,8 @@
name: "gpu-ci"
on:
pull_request:
paths:
- "cmake/**"
- "config/**"
- "deps/**"
- "python/**"
- "setup.py"
- "include/**"
- "inference/**"
- "src/**"
- "tests/inference/**"
- "conda/flexflow.yml"
- ".github/workflows/gpu-ci.yml"
- "tests/cpp_gpu_tests.sh"
- "tests/inference_tests.sh"
- "tests/training_tests.sh"
- "tests/python_interface_test.sh"
push:
branches:
- "master"
- "inference"
paths:
- "cmake/**"
- "config/**"
Expand Down Expand Up @@ -194,7 +177,7 @@ jobs:
- name: Save inference output as an artifact
if: always()
run: |
run: |
cd inference
tar -zcvf output.tar.gz ./output
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,7 @@ if(NOT BUILD_LEGION_ONLY)
if(FF_BUILD_ALL_INFERENCE_EXAMPLES OR FF_BUILD_ALL_EXAMPLES)
add_subdirectory(inference/spec_infer)
add_subdirectory(inference/incr_decoding)
add_subdirectory(inference/peft)
endif()


Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ If you run into any issue during the install, or if you would like to use the C+
docker run --gpus all -it --rm --shm-size=8g ghcr.io/flexflow/flexflow-cuda-12.0:latest
```

To download a Docker container for a backend other than CUDA v12.0, you can replace the `cuda-12.0` suffix with any of the following backends: `cuda-11.1`, `cuda-11.2`, `cuda-11.3`, `cuda-11.4`, `cuda-11.5`, `cuda-11.6`, `cuda-11.7`, `cuda-11.8`, and `hip_rocm-5.3`, `hip_rocm-5.4`, `hip_rocm-5.5`, `hip_rocm-5.6`). More info on the Docker images, with instructions to build a new image from source, or run with additional configurations, can be found [here](./docker/README.md).
To download a Docker container for a backend other than CUDA v12.0, you can replace the `cuda-12.0` suffix with any of the following backends: `cuda-11.1`, `cuda-11.6`, `cuda-11.7`, `cuda-11.8`, `cuda-12.0`, `cuda-12.1`, `cuda-12.1`, and `hip_rocm-5.3`, `hip_rocm-5.4`, `hip_rocm-5.5`, `hip_rocm-5.6`. More info on the Docker images, with instructions to build a new image from source, or run with additional configurations, can be found [here](./docker/README.md).

### Build from source

Expand Down
15 changes: 13 additions & 2 deletions cmake/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,19 @@ if(CUDA_FOUND)
# set cuda runtime and driver lib
# override cublas and curand because the FindCUDA module may not find the correct libs
set(CUDADRV_LIBRARIES ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libcuda${LIBEXT})
set(CUDA_CUBLAS_LIBRARIES ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas${LIBEXT})
set(CUDA_curand_LIBRARY ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcurand${LIBEXT})
if(CUBLAS_PATH)
set(CUBLAS_ROOT ${CUBLAS_PATH})
else()
set(CUBLAS_ROOT ${CUDA_TOOLKIT_ROOT_DIR})
endif()
set(CUDA_CUBLAS_LIBRARIES ${CUBLAS_ROOT}/lib64/libcublas${LIBEXT})
if(CURAND_PATH)
set(CURAND_ROOT ${CURAND_PATH})
else()
set(CURAND_ROOT ${CUDA_TOOLKIT_ROOT_DIR})
endif()
set(CUDA_curand_LIBRARY ${CURAND_ROOT}/lib64/libcurand${LIBEXT})

list(APPEND FLEXFLOW_EXT_LIBRARIES
${CUDADRV_LIBRARIES}
${CUDA_CUBLAS_LIBRARIES}
Expand Down
12 changes: 11 additions & 1 deletion config/config.inc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@ if [ -n "$CUDA_DIR" ]; then
SET_CUDA_LIB_PATH="CUDA_PATH=${CUDA_PATH}"
fi

# set cublas dir
if [ -n "$CUBLAS_DIR" ]; then
SET_CUBLAS="-DCUBLAS_PATH=${CUBLAS_DIR}"
fi

# set curand dir
if [ -n "$CURAND_DIR" ]; then
SET_CURAND="-DCURAND_PATH=${CURAND_DIR}"
fi

# set cudnn dir
if [ -n "$CUDNN_DIR" ]; then
SET_CUDNN="-DCUDNN_PATH=${CUDNN_DIR}"
Expand Down Expand Up @@ -231,7 +241,7 @@ if [ -n "$FF_GPU_BACKEND" ]; then
fi
fi

CMAKE_FLAGS="-DCUDA_USE_STATIC_CUDA_RUNTIME=OFF -DLegion_HIJACK_CUDART=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_INFERENCE_TESTS} ${SET_LIBTORCH_PATH} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUDNN} ${SET_HIP_ARCH} ${SET_PYTHON} ${SET_BUILD_LEGION_ONLY} ${SET_NCCL} ${SET_NCCL_DIR} ${SET_LEGION_NETWORKS} ${SET_UCX} ${SET_EXAMPLES} ${SET_INFERENCE_EXAMPLES} ${SET_USE_PREBUILT_LEGION} ${SET_USE_PREBUILT_NCCL} ${SET_USE_ALL_PREBUILT_LIBRARIES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} ${SET_LEGION_MAX_RETURN_SIZE} ${SET_ROCM_PATH} ${SET_FF_GPU_BACKEND}"
CMAKE_FLAGS="-DCUDA_USE_STATIC_CUDA_RUNTIME=OFF -DLegion_HIJACK_CUDART=OFF ${SET_CC} ${SET_CXX} ${SET_INSTALL_DIR} ${SET_INFERENCE_TESTS} ${SET_LIBTORCH_PATH} ${SET_BUILD} ${SET_CUDA_ARCH} ${SET_CUDA} ${SET_CUBLAS} ${SET_CURAND} ${SET_CUDNN} ${SET_HIP_ARCH} ${SET_PYTHON} ${SET_BUILD_LEGION_ONLY} ${SET_NCCL} ${SET_NCCL_DIR} ${SET_LEGION_NETWORKS} ${SET_UCX} ${SET_EXAMPLES} ${SET_INFERENCE_EXAMPLES} ${SET_USE_PREBUILT_LEGION} ${SET_USE_PREBUILT_NCCL} ${SET_USE_ALL_PREBUILT_LIBRARIES} ${SET_BUILD_UNIT_TESTS} ${SET_AVX2} ${SET_MAX_DIM} ${SET_LEGION_MAX_RETURN_SIZE} ${SET_ROCM_PATH} ${SET_FF_GPU_BACKEND}"

function run_cmake() {
SRC_LOCATION=${SRC_LOCATION:=`dirname $0`/../}
Expand Down
14 changes: 10 additions & 4 deletions config/config.linux
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,18 @@ FF_CUDA_ARCH=${FF_CUDA_ARCH:-"autodetect"}
# or all available architectures. TODO: support autodetect
FF_HIP_ARCH=${FF_HIP_ARCH:-"all"}

# set CUDNN dir in case cmake cannot autodetect a path
CUDNN_DIR=${CUDNN_DIR:-"/usr/local/cuda"}

# set CUDA dir in case cmake cannot autodetect a path
CUDA_DIR=${CUDA_DIR:-"/usr/local/cuda"}

# set CUBLAS dir in case it is not stored in the CUDA DIR
CUBLAS_DIR=${CUBLAS_DIR:-"/usr/local/cuda"}

# set CURAND dir in case it is not stored in the CUDA DIR
CURAND_DIR=${CURAND_DIR:-"/usr/local/cuda"}

# set CUDNN dir in case cmake cannot autodetect a path
CUDNN_DIR=${CUDNN_DIR:-"/usr/local/cuda"}

# if not use PREBUILD_NCCL, you can set NCCL_DIR to use external nccl lib,
# otherwise, we will build nccl from source
NCCL_DIR=${NCCL_DIR:-"/usr/local/cuda"}
Expand Down Expand Up @@ -102,7 +108,7 @@ fi

function get_build_configs() {
# Create a string with the values of the variables set in this script
BUILD_CONFIGS="FF_CUDA_ARCH=${FF_CUDA_ARCH} FF_HIP_ARCH=${FF_HIP_ARCH} CUDNN_DIR=${CUDNN_DIR} CUDA_DIR=${CUDA_DIR} NCCL_DIR=${NCCL_DIR} FF_USE_PYTHON=${FF_USE_PYTHON} BUILD_LEGION_ONLY=${BUILD_LEGION_ONLY} FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT} UCX_DIR=${UCX_DIR} FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS} FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES} FF_BUILD_ALL_INFERENCE_EXAMPLES=${FF_BUILD_ALL_INFERENCE_EXAMPLES} FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS} FF_USE_PREBUILT_NCCL=${FF_USE_PREBUILT_NCCL} FF_USE_PREBUILT_LEGION=${FF_USE_PREBUILT_LEGION} FF_USE_ALL_PREBUILT_LIBRARIES=${FF_USE_ALL_PREBUILT_LIBRARIES} FF_USE_AVX2=${FF_USE_AVX2} FF_MAX_DIM=${FF_MAX_DIM} ROCM_PATH=${ROCM_PATH} FF_GPU_BACKEND=${FF_GPU_BACKEND} INSTALL_DIR=${INSTALL_DIR}"
BUILD_CONFIGS="FF_CUDA_ARCH=${FF_CUDA_ARCH} FF_HIP_ARCH=${FF_HIP_ARCH} CUDA_DIR=${CUDA_DIR} CUDNN_DIR=${CUDNN_DIR} CUBLAS_DIR=${CUBLAS_DIR} CURAND_DIR=${CURAND_DIR} NCCL_DIR=${NCCL_DIR} FF_USE_PYTHON=${FF_USE_PYTHON} BUILD_LEGION_ONLY=${BUILD_LEGION_ONLY} FF_GASNET_CONDUIT=${FF_GASNET_CONDUIT} UCX_DIR=${UCX_DIR} FF_LEGION_NETWORKS=${FF_LEGION_NETWORKS} FF_BUILD_ALL_EXAMPLES=${FF_BUILD_ALL_EXAMPLES} FF_BUILD_ALL_INFERENCE_EXAMPLES=${FF_BUILD_ALL_INFERENCE_EXAMPLES} FF_BUILD_UNIT_TESTS=${FF_BUILD_UNIT_TESTS} FF_USE_PREBUILT_NCCL=${FF_USE_PREBUILT_NCCL} FF_USE_PREBUILT_LEGION=${FF_USE_PREBUILT_LEGION} FF_USE_ALL_PREBUILT_LIBRARIES=${FF_USE_ALL_PREBUILT_LIBRARIES} FF_USE_AVX2=${FF_USE_AVX2} FF_MAX_DIM=${FF_MAX_DIM} ROCM_PATH=${ROCM_PATH} FF_GPU_BACKEND=${FF_GPU_BACKEND} INSTALL_DIR=${INSTALL_DIR}"
}

if [[ -n "$1" && ( "$1" == "CMAKE_FLAGS" || "$1" == "CUDA_PATH" ) ]]; then
Expand Down
6 changes: 3 additions & 3 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ You can build and run the FlexFlow Docker images on any machine, but if you want
## Downloading a pre-built package
The fastest way to run FlexFlow is to use one of the pre-built containers, which we update for each commit to the `inference` branch (the `inference` branch is currently ahead of the `master` branch). The available containers are the following, and can be found [at this link](https://github.com/orgs/flexflow/packages?repo_name=FlexFlow):

* `flexflow`: the pre-built version of FlexFlow. We currently publish four version targeting AMD GPUs (ROCm versions: 5.3, 5.4, 5.5 and 5.6 ), and several versions for CUDA GPUs (CUDA versions: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8 and 12.0). The CUDA images are named `flexflow-<GPU backend>-<GPU software version>`, e.g. [flexflow-hip_rocm-5.6](https://github.com/flexflow/FlexFlow/pkgs/container/flexflow-hip_rocm-5.6) or [flexflow-cuda-12.0](https://github.com/orgs/flexflow/packages/container/package/flexflow-cuda-12.0) or
* `flexflow`: the pre-built version of FlexFlow. We currently publish four version targeting AMD GPUs (ROCm versions: 5.3, 5.4, 5.5 and 5.6 ), and several versions for CUDA GPUs (CUDA versions: 11.1, 11.6, 11.7, 11.8, 12.0, 12.1, and 12.2). The CUDA images are named `flexflow-<GPU backend>-<GPU software version>`, e.g. [flexflow-hip_rocm-5.6](https://github.com/flexflow/FlexFlow/pkgs/container/flexflow-hip_rocm-5.6) or [flexflow-cuda-12.0](https://github.com/orgs/flexflow/packages/container/package/flexflow-cuda-12.0) or
* `flexflow-environment`: this is the base layer for `flexflow`. The packages are used in CI or for internal use, and contain all the dependencies needed to build/run Flexflow. You may find them useful if you want to build FlexFlow yourself. We also publish four version of `flexflow-environment` for AMD GPUs and, for NVIDIA GPUs, one for each CUDA version in the list above. The naming convention is similar, too. For example, the `flexflow-environment` image for CUDA 12.0 is tagged [flexflow-environment-cuda-12.0](https://github.com/orgs/flexflow/packages/container/package/flexflow-environment-cuda-12.0).

The easiest way to download any of the Docker containers above is to call:
Expand All @@ -19,7 +19,7 @@ The easiest way to download any of the Docker containers above is to call:
where `CONTAINER_NAME` is `flexflow` (or `flexflow-environment`). By default, the script will assume a NVIDIA backend and attempt to detect the CUDA version on your machine, to download the relevant container. If your machine has AMD GPUs, or no GPUs, or if you want to specify the CUDA/ROCM version to download, set the environment variables below:

* `FF_GPU_BACKEND` (supported options: `cuda`, `hip_rocm`) to specify the GPU backend of the Docker container to be downloaded.
* `cuda_version` (supported options: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8 and 12.0) to specify the CUDA version, when using a `cuda` backend. If `FF_GPU_BACKEND` is set to `hip_rocm`, the `cuda_version` env will be ignored
* `cuda_version` (supported options: 11.1, 11.6, 11.7, 11.8, 12.0, 12.1 and 12.2) to specify the CUDA version, when using a `cuda` backend. If `FF_GPU_BACKEND` is set to `hip_rocm`, the `cuda_version` env will be ignored
* `hip_version` (supported options: 5.3, 5.4, 5.5, 5.6) to specify the ROCm version, when using a HIP backend. If `FF_GPU_BACKEND` is set to `cuda`, the `hip_version` env will be ignored.


Expand All @@ -44,7 +44,7 @@ If you only want to build the `flexflow-environment` image (the base layers of t
After having either built or downloaded a Docker container by following the instructions above, you can run it with the following command (image name argument of the run script can be omitted). Once again, you can set the `FF_GPU_BACKEND`, `cuda_version` and `hip_version` optional environment variables to run the docker image with the desired GPU backend and CUDA/HIP version:

* `FF_GPU_BACKEND` (supported options: `cuda`, `hip_rocm`) to specify the GPU backend of the Docker container to be run.
* `cuda_version` (supported options: 11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8 and 12.0) to specify the CUDA version, when using a `cuda` backend. If `FF_GPU_BACKEND` is set to `hip_rocm`, the `cuda_version` env will be ignored
* `cuda_version` (supported options: 11.1, 11.6, 11.7, 11.8, 12.0, 12.1, 12.2) to specify the CUDA version, when using a `cuda` backend. If `FF_GPU_BACKEND` is set to `hip_rocm`, the `cuda_version` env will be ignored
* `hip_version` (supported options: 5.3, 5.4, 5.5, 5.6) to specify the ROCm version, when using a HIP backend. If `FF_GPU_BACKEND` is set to `cuda`, the `hip_version` env will be ignored.

Leaving these variables unset will assume a GPU backend, and instruct the script to autodetect the CUDA version installed on the current machine and run the Docker container with it if available.
Expand Down
12 changes: 6 additions & 6 deletions docker/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,20 @@ if [[ "${FF_GPU_BACKEND}" == "cuda" || "${FF_GPU_BACKEND}" == "hip_cuda" ]]; the
# Check that CUDA version is supported, and modify cuda version to include default subsubversion
if [[ "$cuda_version" == @(11.1|11.3|11.7|12.0|12.1) ]]; then
cuda_version_input=${cuda_version}.1
elif [[ "$cuda_version" == @(11.2|11.5|11.6) ]]; then
elif [[ "$cuda_version" == @(11.2|11.5|11.6|12.2) ]]; then
cuda_version_input=${cuda_version}.2
elif [[ "$cuda_version" == @(11.4) ]]; then
cuda_version_input=${cuda_version}.3
elif [[ "$cuda_version" == @(11.8|12.2) ]]; then
elif [[ "$cuda_version" == @(11.8) ]]; then
cuda_version_input=${cuda_version}.0
else
echo "cuda_version is not supported, please choose among {11.1|11.2|11.3|11.4|11.5|11.6|11.7|11.8|12.0|12.1|12.2}"
exit 1
fi
# Use CUDA 12.0 for all versions greater or equal to 12.0 for now
if [[ "$cuda_version" == @(12.1|12.2|12.3|12.4|12.5|12.6|12.7|12.8|12.9) ]]; then
cuda_version=12.0
cuda_version_input=${cuda_version}.1
# Use CUDA 12.2 for all versions greater or equal to 12.2 for now (the Docker machine with CUDNN is not yet available)
if [[ "$cuda_version" == @(12.3|12.4|12.5|12.6|12.7|12.8|12.9) ]]; then
cuda_version=12.2
cuda_version_input=${cuda_version}.2
fi
echo "Building $image docker image with CUDA $cuda_version"
ff_environment_base_image="nvidia/cuda:${cuda_version_input}-cudnn8-devel-ubuntu20.04"
Expand Down
10 changes: 5 additions & 5 deletions docker/pull.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ if [[ "${FF_GPU_BACKEND}" == "cuda" || "${FF_GPU_BACKEND}" == "hip_cuda" ]]; the
fi
fi
# Check that CUDA version is supported
if [[ "$cuda_version" != @(11.1|11.2|11.3|11.4|11.5|11.6|11.7|11.8|12.0|12.1|12.2) ]]; then
echo "cuda_version is not supported, please choose among {11.1|11.2|11.3|11.4|11.5|11.6|11.7|11.8|12.0|12.1|12.2}"
if [[ "$cuda_version" != @(11.1|11.6|11.7|11.8|12.0|12.1|12.2) ]]; then
echo "cuda_version is not available for download, please choose among {11.1|11.6|11.7|11.8|12.0|12.1|12.2}"
exit 1
fi
# Use CUDA 12.0 for all versions greater or equal to 12.0 for now
if [[ "$cuda_version" == @(12.1|12.2|12.3|12.4|12.5|12.6|12.7|12.8|12.9) ]]; then
cuda_version=12.0
# Use CUDA 12.2 for all versions greater or equal to 12.2 for now
if [[ "$cuda_version" == @(12.3|12.4|12.5|12.6|12.7|12.8|12.9) ]]; then
cuda_version=12.2
fi
# Set cuda version suffix to docker image name
echo "Downloading $image docker image with CUDA $cuda_version"
Expand Down
6 changes: 3 additions & 3 deletions docker/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ if [[ "${FF_GPU_BACKEND}" == "cuda" || "${FF_GPU_BACKEND}" == "hip_cuda" ]]; the
echo "cuda_version is not supported, please choose among {11.1|11.2|11.3|11.4|11.5|11.6|11.7|11.8|12.0|12.1|12.2}"
exit 1
fi
# Use CUDA 12.0 for all versions greater or equal to 12.0 for now
if [[ "$cuda_version" == @(12.1|12.2|12.3|12.4|12.5|12.6|12.7|12.8|12.9) ]]; then
cuda_version=12.0
# Use CUDA 12.2 for all versions greater or equal to 12.2 for now
if [[ "$cuda_version" == @(12.3|12.4|12.5|12.6|12.7|12.8|12.9) ]]; then
cuda_version=12.2
fi
# Set cuda version suffix to docker image name
echo "Running $image docker image with CUDA $cuda_version"
Expand Down
1 change: 1 addition & 0 deletions include/flexflow/batch_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class BatchConfig {
static int max_requests_per_batch();
static int max_tokens_per_batch();
static int max_verify_tokens_per_batch();
static int max_spec_tree_token_num();
static int max_sequence_length();
friend std::ostream &operator<<(std::ostream &os, BatchConfig const &bc);
void print() const;
Expand Down
2 changes: 1 addition & 1 deletion include/flexflow/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class FFConfig {
Legion::Runtime *lg_hlr;
Legion::IndexSpaceT<1> all_gpu_task_is;
// Legion::FieldSpace field_space;
bool syntheticInput, profiling, perform_fusion;
bool benchmarking, profiling, perform_fusion;
bool inference_debugging;
size_t simulator_work_space_size;
size_t search_budget;
Expand Down
8 changes: 6 additions & 2 deletions include/flexflow/ffconst.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ enum InferenceMode {
TREE_VERIFY_MODE = 2003,
};

enum RequestType {
REQ_INFERENCE = 4001,
REQ_FINETUNING = 4002,
};

// This is consistent with TASO's OpType
// https://github.com/jiazhihao/TASO/blob/master/include/taso/ops.h#L75-L138
enum OperatorType {
Expand Down Expand Up @@ -179,8 +184,7 @@ enum OperatorType {
OP_TREE_INC_MULTIHEAD_SELF_ATTENTION,
OP_SAMPLING,
// PEFT Ops
OP_LORA_MLP_FIRST,
OP_LORA_MLP_SECOND,
OP_LORA,
// Parallel Ops
OP_REPARTITION,
OP_COMBINE,
Expand Down
Loading

0 comments on commit 17257cd

Please sign in to comment.