gordicaleksa · vienneraphael · Jul 6, 2022 · Jul 7, 2022 · Jul 7, 2022 · Jul 7, 2022
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -6,9 +6,9 @@ version: 2.1
 # -------------------------------------------------------------------------------------
 gpu: &gpu
   environment:
-    CUDA_VERSION: "11.1"
+    CUDA_VERSION: "11.2"
   machine:
-    image: ubuntu-1604-cuda-11.1:202012-01
+    image: ubuntu-2004-cuda-11.2:202103-01
   resource_class: gpu.nvidia.medium.multi
 
 
@@ -23,10 +23,11 @@ install_dep_common: &install_dep_common
       command: |
         source activate fairseq
         pip install --upgrade setuptools
-        pip install bitarray boto3 deepspeed editdistance fastBPE iopath ipdb ipython pyarrow pytest sacremoses sentencepiece subword-nmt hydra-core==1.0.7 omegaconf==2.0.6
+        pip install bitarray boto3 deepspeed editdistance fastBPE iopath ipdb ipython pyarrow pytest sacremoses sentencepiece subword-nmt hydra-core==1.2.0 omegaconf==2.2.2
+        pip install statsmodels==0.12.2 more_itertools submitit boto3 editdistance transformers sklearn scipy cython Jinja2==2.11.3
         pip install --progress-bar off pytest
         pip install --progress-bar off fairscale
-        pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda111 -U
+        pip install -i https://test.pypi.org/simple/ bitsandbytes-cuda112 -U
         python -c 'import torch; print("Torch version:", torch.__version__)'
         python -m torch.utils.collect_env
 
@@ -36,40 +37,45 @@ install_dep_fused_ops: &install_dep_fused_ops
       working_directory: ~/
       command: |
         source activate fairseq
-        git clone https://github.com/NVIDIA/apex
-        cd apex
-        git checkout e2083df5eb96643c61613b9df48dd4eea6b07690
-        pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--deprecated_fused_adam" --global-option="--xentropy" --global-option="--fast_multihead_attn" ./
-        cd ~/
-        git clone --depth=1 --branch v2.4 https://github.com/NVIDIA/Megatron-LM.git
-        cd Megatron-LM
-        pip install -e .
-
+        if [ ! -d "apex" ]; then
+          git clone https://github.com/NVIDIA/apex
+          cd apex
+          git checkout e2083df5eb96643c61613b9df48dd4eea6b07690
+          sed -i '101,107 s/^/#/' setup.py
+          pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--deprecated_fused_adam" --global-option="--xentropy" --global-option="--fast_multihead_attn" ./
+          cd ~/
+        fi
+        if [ ! -d "Megatron-LM" ]; then
+          git clone --depth=1 --branch v2.4 https://github.com/NVIDIA/Megatron-LM.git
+          cd Megatron-LM
+          pip install -e .
+          cd ~/
+        fi
 
-install_dep_pt19: &install_dep_pt19
+install_dep_pt110: &install_dep_pt110
   - run:
       name: Install Pytorch Dependencies
       command: |
         source activate fairseq
         pip install --upgrade setuptools
-        pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
+        pip install torch==1.10.1+cu111 torchvision==0.11.2+cu111 torchaudio==0.10.1 -f https://download.pytorch.org/whl/torch_stable.html
         python -c 'import torch; print("Torch version:", torch.__version__)'
 
-install_dep_pt18: &install_dep_pt18
+install_dep_pt19: &install_dep_pt19
   - run:
       name: Install Pytorch Dependencies
       command: |
         source activate fairseq
         pip install --upgrade setuptools
-        pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
+        pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
         python -c 'import torch; print("Torch version:", torch.__version__)'
 
 install_repo: &install_repo
   - run:
       name: Install Repository
       command: |
         source activate fairseq
-        pip install .
+        pip install -e .
         python setup.py build_ext --inplace
 
 run_unittests: &run_unittests
@@ -130,7 +136,7 @@ jobs:
       - <<: *install_repo
       - <<: *run_unittests
 
-  gpu_tests_pt18:
+  gpu_tests_pt110:
     <<: *gpu
 
     working_directory: ~/fairseq-py
@@ -141,7 +147,7 @@ jobs:
       - <<: *create_conda_env
       - restore_cache:
           key: *cache_key
-      - <<: *install_dep_pt18
+      - <<: *install_dep_pt110
       - <<: *install_dep_common
       - <<: *install_dep_fused_ops
       - save_cache:
@@ -155,5 +161,5 @@ workflows:
   version: 2
   build:
     jobs:
-      - gpu_tests_pt18
       - gpu_tests_pt19
+      - gpu_tests_pt110
diff --git a/.github/workflows/cpu_tests.yml b/.github/workflows/cpu_tests.yml
@@ -0,0 +1,63 @@
+name: cpu_tests
+
+on: [push, pull_request]
+
+jobs:
+  unittest:
+
+    strategy:
+      fail-fast: false
+      max-parallel: 12
+      matrix:
+        platform: [ubuntu-latest, macos-latest]
+        python-version: [3.8, 3.9]
+
+    runs-on: ${{ matrix.platform }}
+
+    steps:
+    - name: Checkout branch 🛎️
+      uses: actions/checkout@v2
+
+    - name: Setup Conda Environment
+      uses: conda-incubator/setup-miniconda@v2
+      with:
+        activate-environment: fairseq
+        python-version: ${{ matrix.python-version }}
+        auto-update-conda: true
+        use-only-tar-bz2: true
+
+    - name: Cache Conda Environment
+      uses: actions/cache@v2
+      env:
+        # Increase this value to reset cache if nothing has changed but you still
+        # want to invalidate the cache
+        CACHE_NUMBER: 0
+      with:
+        path: |
+          /usr/share/miniconda/envs/
+          /usr/local/miniconda/envs/
+        key: fairseq-cpu-${{ matrix.platform }}-python${{ matrix.python-version }}-${{ env.CACHE_NUMBER }}-${{ hashFiles('**/.github/workflows/cpu_tests.yml') }}-${{ hashFiles('**/setup.py') }}
+
+
+    - name: Install Dependencies
+      shell: bash -l {0}
+      run: |
+        conda activate fairseq
+        git submodule update --init --recursive
+        pip install torch==1.10.1 torchvision==0.11.2 torchaudio==0.10.1 statsmodels==0.12.2 more_itertools submitit boto3 editdistance iopath ipdb ipython pyarrow pytest sacremoses sentencepiece subword-nmt transformers sklearn scipy fairscale Jinja2==2.11.3
+
+    - name: Install Repository
+      shell: bash -l {0}
+      run: |
+        conda activate fairseq
+        python setup.py clean --all
+        pip install --editable .
+        python setup.py build_ext --inplace
+
+
+    - name: Run CPU tests
+      shell: bash -l {0}
+      run: |
+        conda activate fairseq
+        cd tests
+        pytest --continue-on-collection-errors -v .
diff --git a/.github/workflows/build.yml → .github/workflows/lint.yml b/.github/workflows/build.yml → .github/workflows/lint.yml
@@ -1,4 +1,4 @@
-name: build
+name: lint_tests
 
 on:
   # Trigger the workflow on push to main or any pull request
@@ -11,10 +11,10 @@ jobs:
   build:
 
     strategy:
-      max-parallel: 4
+      max-parallel: 1
       matrix:
-        platform: [ubuntu-latest, macos-latest]
-        python-version: [3.8, 3.9]
+        platform: [ubuntu-latest]
+        python-version: [3.8]
 
     runs-on: ${{ matrix.platform }}
 
@@ -26,34 +26,20 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
-    - name: Conditionally install pytorch
-      if: matrix.platform == 'windows-latest'
-      run: pip3 install torch -f https://download.pytorch.org/whl/torch_stable.html
-
     - name: Install locally
       run: |
         python -m pip install --upgrade pip
         git submodule update --init --recursive
         python setup.py build_ext --inplace
-        python -m pip install --editable .
-
-    - name: Install optional test requirements
-      run: |
-        python -m pip install iopath transformers pyarrow
-        python -m pip install git+https://github.com/facebookresearch/fairscale.git@main
+        python -m pip install --editable '.[dev]'
 
     - name: Lint with flake8
       run: |
-        pip install flake8
         # stop the build if there are Python syntax errors or undefined names
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --extend-exclude fairseq/model_parallel/megatron
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --extend-exclude fairseq/model_parallel/megatron
 
-    - name: Run tests
-      run: |
-          python setup.py test
-
     - name: Lint with black
       run: |
         pip install black

diff --git a/.gitignore b/.gitignore
@@ -34,6 +34,9 @@ wheels/
 # Checkpoints
 checkpoints
 
+# slurm snap shot
+slurm_snapshot_code/
+
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
@@ -117,6 +120,10 @@ ENV/
 
 # data
 data-bin/
+examples/nllb/data/non_train_datasets/
+examples/nllb/data/train_datasets/
+examples/nllb/data/eval_datasets/
+model_checkpoints/
 
 # reranking
 /examples/reranking/rerank_data
@@ -128,6 +135,7 @@ data-bin/
 # VSCODE
 .vscode/ftp-sync.json
 .vscode/settings.json
+.vscode/launch.json
 
 # Experimental Folder
 experimental/*
@@ -139,3 +147,5 @@ wandb/
 nohup.out
 multirun
 outputs
+
+# data
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,14 +13,15 @@ repos:
     -   id: no-commit-to-branch
         args: ['--branch=master']
     -   id: check-added-large-files
-        args: ['--maxkb=500']
+        args: ['--maxkb=2048']
     -   id: end-of-file-fixer
 
 -   repo: https://github.com/ambv/black
     rev: 22.1.0
     hooks:
     - id: black
       language_version: python3.8
+      additional_dependencies: ['click==8.0.4']
 
 -   repo: https://gitlab.com/pycqa/flake8
     rev: 3.9.2

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -23,13 +23,13 @@ include:
 Examples of unacceptable behavior by participants include:
 
 * The use of sexualized language or imagery and unwelcome sexual attention or
-  advances
+advances
 * Trolling, insulting/derogatory comments, and personal or political attacks
 * Public or private harassment
 * Publishing others' private information, such as a physical or electronic
-  address, without explicit permission
+address, without explicit permission
 * Other conduct which could reasonably be considered inappropriate in a
-  professional setting
+professional setting
 
 ## Our Responsibilities
 
@@ -52,10 +52,14 @@ project e-mail address, posting via an official social media account, or acting
 as an appointed representative at an online or offline event. Representation of
 a project may be further defined and clarified by project maintainers.
 
+This Code of Conduct also applies outside the project spaces when there is a
+reasonable belief that an individual's behavior may have a negative impact on
+the project or its community.
+
 ## Enforcement
 
 Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported by contacting the project team at <conduct@pytorch.org>. All
+reported by contacting the project team at <opensource-conduct@fb.com>. All
 complaints will be reviewed and investigated and will result in a response that
 is deemed necessary and appropriate to the circumstances. The project team is
 obligated to maintain confidentiality with regard to the reporter of an incident.
@@ -74,4 +78,3 @@ available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.ht
 
 For answers to common questions about this code of conduct, see
 https://www.contributor-covenant.org/faq
-
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,4 +1,4 @@
-# Contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq)
+# Contributing to fairseq
 We want to make contributing to this project as easy and transparent as
 possible.
 
@@ -14,26 +14,28 @@ We actively welcome your pull requests.
 
 ## Contributor License Agreement ("CLA")
 In order to accept your pull request, we need you to submit a CLA. You only need
-to do this once to work on any of Facebook's open source projects.
+to do this once to work on any of Meta's open source projects.
 
 Complete your CLA here: <https://code.facebook.com/cla>
 
 ## Issues
 We use GitHub issues to track public bugs. Please ensure your description is
 clear and has sufficient instructions to be able to reproduce the issue.
 
+Meta has a [bounty program](https://www.facebook.com/whitehat/) for the safe
+disclosure of security bugs. In those cases, please go through the process
+outlined on that page and do not file a public issue.
+
 ## License
-By contributing to Facebook AI Research Sequence-to-Sequence Toolkit (fairseq),
-you agree that your contributions will be licensed under the LICENSE file in
-the root directory of this source tree.
+By contributing to fairseq, you agree that your contributions will be licensed under the LICENSE file in the root directory of this source tree.
 
 ## Pre-commit hooks
 In order to ensure your code lints, there are pre-commit hooks configured in the repository which you can install.
 After installation, they will automatically run each time you commit.
 An abbreviated guide is given below; for more information, refer to [the offical pre-commit documentation](https://pre-commit.com/).
 
 ### Installation
-```
+```bash
 pip install pre-commit
 pre-commit install
 ```