Skip to content

Add custom prepare_for_training logic to ECD model for LLM encoder adapter initialization #11867

Add custom prepare_for_training logic to ECD model for LLM encoder adapter initialization

Add custom prepare_for_training logic to ECD model for LLM encoder adapter initialization #11867

Workflow file for this run

# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
name: pytest
on:
push:
branches: ["master", "release-*"]
pull_request:
branches: ["master", "release-*"]
# We want an ongoing run of this workflow to be canceled by a later commit
# so that there is only one concurrent run of this workflow for each branch
concurrency:
group: pytest-${{ github.head_ref || github.sha }}
cancel-in-progress: true
jobs:
pytest:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ["3.8", "3.9", "3.10"]
test-markers: ["not distributed", "distributed"]
include:
- python-version: "3.8"
pytorch-version: 2.0.0
torchscript-version: 1.10.2
ray-version: 2.2.0
- python-version: "3.9"
pytorch-version: 2.1.1
torchscript-version: 1.10.2
ray-version: 2.3.1
- python-version: "3.10"
pytorch-version: nightly
torchscript-version: 1.10.2
ray-version: 2.3.1
env:
PYTORCH: ${{ matrix.pytorch-version }}
MARKERS: ${{ matrix.test-markers }}
NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
NEUROPOD_VERISON: "0.3.0-rc6"
TORCHSCRIPT_VERISON: ${{ matrix.torchscript-version }}
RAY_VERSION: ${{ matrix.ray-version }}
AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }}
name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, ${{ matrix.test-markers }}, ${{ matrix.os }}, ray ${{ matrix.ray-version }}
services:
minio:
image: fclairamb/minio-github-actions
env:
MINIO_ACCESS_KEY: minio
MINIO_SECRET_KEY: minio123
ports:
- 9000:9000
timeout-minutes: 150
steps:
- name: Setup ludwigai/ludwig-ray container for local testing with act.
if: ${{ env.ACT }}
run: |
curl -fsSL https://deb.nodesource.com/setup_16.x | sudo -E bash -
sudo apt-get install -y nodejs
sudo mkdir -p /opt/hostedtoolcache/
sudo chmod 777 -R /opt/hostedtoolcache/
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Setup Linux
if: runner.os == 'linux'
run: |
sudo apt-get update && sudo apt-get install -y cmake libsndfile1 wget libsox-dev
- name: Setup macOS
if: runner.os == 'macOS'
run: |
brew install libuv
- name: pip cache
if: ${{ !env.ACT }}
uses: actions/cache@v2
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ matrix.test-markers }}-${{ hashFiles('requirements*.txt', '.github/workflows/pytest.yml') }}
- name: Debug out of space
run: |
du -h -d 1 ~
df -h
- name: Install dependencies
run: |
python --version
pip --version
python -m pip install -U pip
cmake --version
# remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
cat requirements_distributed.txt | sed '/^ray[\[]/d'
if [ "$MARKERS" != "distributed" ]; then
# Skip distributed and hyperopt requirements to test optional imports
echo > requirements-temp && mv requirements-temp requirements_distributed.txt
echo > requirements-temp && mv requirements-temp requirements_hyperopt.txt
# Skip distributed tree requirement (lightgbm-ray)
cat requirements_tree.txt | sed '/^lightgbm-ray/d' > requirements-temp && mv requirements-temp requirements_tree.txt
else
if [ "$RAY_VERSION" == "nightly" ]; then
# NOTE: hardcoded for python 3.10 on Linux
echo "ray[default,data,serve,tune] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp310-cp310-manylinux2014_x86_64.whl" >> requirements_distributed.txt
else
echo "ray[default,data,serve,tune]==$RAY_VERSION" >> requirements_distributed.txt
fi
fi
if [ "$PYTORCH" == "nightly" ]; then
extra_index_url=https://download.pytorch.org/whl/nightly/cpu
# Temporary Fix - remove torchdata once https://github.com/pytorch/text/pull/2218 lands.
pip install torchdata --extra-index-url https://download.pytorch.org/whl/cpu
pip install --pre torch torchdata torchtext torchvision torchaudio --index-url $extra_index_url
else
extra_index_url=https://download.pytorch.org/whl/cpu
pip install torch==$PYTORCH torchtext torchvision torchaudio --extra-index-url $extra_index_url
fi
pip install '.[test]' --extra-index-url $extra_index_url
pip list
if [ "$PYTORCH" == "nightly" ]; then
python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release >= version.parse(\"2.0.0\").release, f\"torch {version.parse(torch.__version__).release} < version.parse(\'2.0.0\').release\""
else
python -c "from packaging import version; import torch; assert version.parse(torch.__version__).release == version.parse(\"$PYTORCH\").release, f\"torch {version.parse(torch.__version__).release} != version.parse(\'$PYTORCH\').release\""
fi
if [ "$MARKERS" == "distributed" ]; then
python -c "from packaging import version; import ray; assert version.parse(ray.__version__).release == version.parse(\"$RAY_VERSION\").release, f\"ray {version.parse(ray.__version__).release} != version.parse(\'$RAY_VERSION\').release\""
else
python -c "import importlib.util; assert importlib.util.find_spec('ray') is None, \"found ray but expected it to not be installed\""
fi
shell: bash
- name: Install Neuropod backend
run: |
sudo mkdir -p "$NEUROPOD_BASE_DIR"
curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERISON }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
shell: bash
- name: Unit Tests
run: |
RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod and not llm" --junitxml pytest.xml tests/ludwig
- name: Regression Tests
run: |
RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and not slow and not combinatorial and not horovod or benchmark and not llm" --junitxml pytest.xml tests/regression_tests
# Skip Horovod and replace with DDP.
# https://github.com/ludwig-ai/ludwig/issues/3468
# - name: Install Horovod if necessary
# if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
# env:
# HOROVOD_WITH_PYTORCH: 1
# HOROVOD_WITHOUT_MPI: 1
# HOROVOD_WITHOUT_TENSORFLOW: 1
# HOROVOD_WITHOUT_MXNET: 1
# run: |
# pip install -r requirements_extra.txt
# HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
# if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
# pip uninstall -y horovod
# pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
# fi
# horovodrun --check-build
# shell: bash
# Skip Horovod tests and replace with DDP.
# https://github.com/ludwig-ai/ludwig/issues/3468
# - name: Horovod Tests
# if: matrix.test-markers == 'distributed' && matrix.pytorch-version != 'nightly'
# run: |
# RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=5400 pytest -v --timeout 300 --durations 100 -m "$MARKERS and horovod and not slow and not combinatorial and not llm" --junitxml pytest.xml tests/
- name: Upload Unit Test Results
if: ${{ always() && !env.ACT }}
uses: actions/upload-artifact@v2
with:
name: Unit Test Results (Python ${{ matrix.python-version }} ${{ matrix.test-markers }})
path: pytest.xml
integration-tests:
name: ${{ matrix.test-markers }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
test-markers:
- "integration_tests_a"
- "integration_tests_b"
- "integration_tests_c"
- "integration_tests_d"
- "integration_tests_e"
- "integration_tests_f"
env:
AWS_ACCESS_KEY_ID: ${{ secrets.LUDWIG_TESTS_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.LUDWIG_TESTS_AWS_SECRET_ACCESS_KEY }}
KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
IS_NOT_FORK: ${{ !(github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && github.event.pull_request.head.repo.fork) }}
MARKERS: ${{ matrix.test-markers }}
services:
minio:
image: fclairamb/minio-github-actions
env:
MINIO_ACCESS_KEY: minio
MINIO_SECRET_KEY: minio123
ports:
- 9000:9000
timeout-minutes: 90
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Setup Linux
if: runner.os == 'linux'
run: |
sudo apt-get update && sudo apt-get install -y cmake libsndfile1
- name: Setup macOS
if: runner.os == 'macOS'
run: |
brew install libuv
- name: Install dependencies
run: |
python --version
pip --version
python -m pip install -U pip
# remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
cat requirements_distributed.txt | sed '/^ray[\[]/d'
pip install torch==2.0.0 torchtext torchvision torchaudio
pip install ray==2.3.0
pip install '.[test]'
pip list
shell: bash
- name: Integration Tests
run: |
RUN_PRIVATE=$IS_NOT_FORK LUDWIG_TEST_SUITE_TIMEOUT_S=7200 pytest -v --timeout 300 --durations 100 -m "not slow and not combinatorial and not horovod and not llm and $MARKERS" --junitxml pytest.xml tests/integration_tests
llm-tests:
name: LLM Tests
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Setup Linux
if: runner.os == 'linux'
run: |
sudo apt-get update && sudo apt-get install -y cmake libsndfile1
- name: Setup macOS
if: runner.os == 'macOS'
run: |
brew install libuv
- name: Install dependencies
run: |
python --version
pip --version
python -m pip install -U pip
# remove torch and ray from the dependencies so we can add them depending on the matrix args for the job.
cat requirements.txt | sed '/^torch[>=<\b]/d' | sed '/^torchtext/d' | sed '/^torchvision/d' | sed '/^torchaudio/d' > requirements-temp && mv requirements-temp requirements.txt
cat requirements_distributed.txt | sed '/^ray[\[]/d'
pip install torch==2.0.0 torchtext torchvision torchaudio
pip install ray==2.3.0
pip install '.[test]'
pip list
shell: bash
- name: LLM Tests
run: |
pytest -vs --durations 100 -m "llm" --junitxml pytest.xml tests
combinatorial-tests:
name: Combinatorial Tests
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Setup Linux
if: runner.os == 'linux'
run: |
sudo apt-get update && sudo apt-get install -y cmake libsndfile1
- name: Setup macOS
if: runner.os == 'macOS'
run: |
brew install libuv
- name: Install dependencies
run: |
python --version
pip --version
python -m pip install -U pip
pip install '.[test]'
pip list
shell: bash
- name: Testing combinatorial config generation code
run: |
pytest -vs --durations 100 -m "combinatorial" --junitxml pytest.xml tests/ludwig/config_sampling
- name: Combinatorial Tests
run: |
pytest -rx --durations 100 -m "combinatorial" --junitxml pytest.xml tests/training_success
test-minimal-install:
name: Test Minimal Install
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Setup Linux
if: runner.os == 'linux'
run: |
sudo apt-get update && sudo apt-get install -y cmake libsndfile1
- name: Setup macOS
if: runner.os == 'macOS'
run: |
brew install libuv
- name: Install dependencies
run: |
python --version
pip --version
python -m pip install -U pip
pip install torch==2.0.0 torchtext
pip install ray==2.3.0
pip install '.'
pip list
shell: bash
- name: Check Install
run: |
ludwig check_install
shell: bash
- name: Test Getting Started
run: |
cd examples/getting_started && sh ./run.sh
shell: bash
# start-runner:
# name: Start self-hosted EC2 runner
# if: >
# always() && needs.pytest.result != 'failure' && (
# github.event_name == 'schedule' && github.repository == 'ludwig-ai/ludwig' ||
# github.event_name == 'push' && github.repository == 'ludwig-ai/ludwig' ||
# github.event_name == 'pull_request' && github.event.pull_request.base.repo.full_name == 'ludwig-ai/ludwig' && !github.event.pull_request.head.repo.fork)
# needs: pytest
# runs-on: ubuntu-latest
# outputs:
# label: ${{ steps.start-ec2-runner.outputs.label }}
# ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
# steps:
# - name: Configure AWS credentials
# uses: aws-actions/configure-aws-credentials@v1
# with:
# aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
# aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
# aws-region: ${{ secrets.AWS_REGION }}
# - name: Start EC2 runner
# id: start-ec2-runner
# uses: machulav/[email protected]
# with:
# mode: start
# github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
# ec2-image-id: ami-0759580dedc953d1f
# ec2-instance-type: g4dn.xlarge
# subnet-id: subnet-0983be43
# security-group-id: sg-4cba0d08
# aws-resource-tags: >
# [
# {"Key": "Name", "Value": "ludwig-github-${{ github.head_ref || github.sha }}"},
# {"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
# {"Key": "GitHubHeadRef", "Value": "${{ github.head_ref }}"},
# {"Key": "GitHubSHA", "Value": "${{ github.sha }}"}
# ]
# pytest-gpu:
# if: needs.start-runner.result != 'skipped'
# needs: start-runner # required to start the main job when the runner is ready
# runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runners
# strategy:
# fail-fast: false
# matrix:
# python-version: [3.7]
# include:
# - python-version: 3.7
# pytorch-version: 1.10.0
# torchscript-version: 1.10.2
# env:
# PYTORCH: ${{ matrix.pytorch-version }}
# NEUROPOD_BASE_DIR: "/usr/local/lib/neuropod"
# NEUROPOD_VERISON: "0.3.0-rc6"
# TORCHSCRIPT_VERISON: ${{ matrix.torchscript-version }}
# name: py${{ matrix.python-version }}, torch-${{ matrix.pytorch-version }}, gpu
# timeout-minutes: 70
# steps:
# - uses: actions/checkout@v2
# - name: Set up Python ${{ matrix.python-version }}
# uses: actions/setup-python@v2
# with:
# python-version: ${{ matrix.python-version }}
# - name: Setup Linux
# if: runner.os == 'linux'
# run: |
# sudo apt-get update && sudo apt-get install -y libsndfile1 cmake ccache build-essential g++-8 gcc-8
# cmake --version
# - name: Install CUDA drivers
# run: |
# wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
# sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
# wget https://developer.download.nvidia.com/compute/cuda/11.5.1/local_installers/cuda-repo-ubuntu2004-11-5-local_11.5.1-495.29.05-1_amd64.deb
# sudo dpkg -i cuda-repo-ubuntu2004-11-5-local_11.5.1-495.29.05-1_amd64.deb
# sudo apt-key add /var/cuda-repo-ubuntu2004-11-5-local/7fa2af80.pub
# sudo apt-get update
# sudo apt-get -y install cuda
# shell: bash
# - name: pip cache
# uses: actions/cache@v2
# with:
# path: ~/.cache/pip
# key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-${{ hashFiles('requirements*.txt') }}
# restore-keys: |
# ${{ runner.os }}-pip-py${{ matrix.python-version }}-torch${{ matrix.pytorch-version }}-
# - name: Install dependencies
# env:
# HOROVOD_WITH_PYTORCH: 1
# HOROVOD_WITHOUT_MPI: 1
# HOROVOD_WITHOUT_TENSORFLOW: 1
# HOROVOD_WITHOUT_MXNET: 1
# run: |
# python --version
# pip --version
# python -m pip install -U pip
# if [ $PYTORCH == "nightly" ]; then
# cat requirements.txt | sed '/^torch[>=<]/d' > requirements-temp && mv requirements-temp requirements.txt
# pip install --pre torch torchvision -f https://download.pytorch.org/whl/torch_stable.html
# else
# pip install torch==${PYTORCH}+cu111 -f https://download.pytorch.org/whl/torch_stable.html
# fi
# # pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
# pip install dulwich==0.20.26 # workaround for `/usr/bin/ld: cannot find -lpython3.7m`
# pip install '.[test]'
# pip list
# shell: bash
# - name: Install Neuropod backend
# run: |
# sudo mkdir -p "$NEUROPOD_BASE_DIR"
# curl -L https://github.com/uber/neuropod/releases/download/v${{ env.NEUROPOD_VERISON }}/libneuropod-cpu-linux-v${{ env.NEUROPOD_VERISON }}-torchscript-${{ env.TORCHSCRIPT_VERISON }}-backend.tar.gz | sudo tar -xz -C "$NEUROPOD_BASE_DIR"
# shell: bash
# - name: Reinstall Horovod if necessary
# env:
# HOROVOD_WITH_PYTORCH: 1
# HOROVOD_WITHOUT_MPI: 1
# HOROVOD_WITHOUT_TENSORFLOW: 1
# HOROVOD_WITHOUT_MXNET: 1
# run: |
# HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
# if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
# pip uninstall -y horovod
# pip install --no-cache-dir git+https://github.com/horovod/horovod.git@master
# fi
# horovodrun --check-build
# shell: bash
# - name: Check CUDA is available
# run: |
# python -c "import torch; assert torch.cuda.is_available()"
# - name: Tests
# run: |
# pytest -v --timeout 300 --durations 10 --junitxml pytest.xml tests
# - name: Upload Unit Test Results
# if: always()
# uses: actions/upload-artifact@v2
# with:
# name: Unit Test Results (Python ${{ matrix.python-version }} gpu
# path: pytest.xml
event_file:
name: "Event File"
runs-on: ubuntu-latest
steps:
- name: Upload
if: ${{ !env.ACT }}
uses: actions/upload-artifact@v2
with:
name: Event File
path: ${{ github.event_path }}
# stop-runner:
# name: Stop self-hosted EC2 runner
# # required to stop the runner even if the error happened in the previous job
# if: always() && needs.start-runner.result != 'skipped'
# needs:
# - start-runner # required to get output from the start-runner job
# - pytest-gpu # required to wait when the main job is done
# runs-on: ubuntu-latest
# steps:
# - name: Configure AWS credentials
# uses: aws-actions/configure-aws-credentials@v1
# with:
# aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
# aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
# aws-region: ${{ secrets.AWS_REGION }}
# - name: Stop EC2 runner
# uses: machulav/[email protected]
# with:
# mode: stop
# github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
# label: ${{ needs.start-runner.outputs.label }}
# ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}