Skip to content

Commit

Permalink
Merge branch 'develop' into distconv-channelwise-softmax
Browse files Browse the repository at this point in the history
  • Loading branch information
bvanessen authored Jun 8, 2024
2 parents 8ded742 + dd8b60d commit d736a53
Show file tree
Hide file tree
Showing 1,698 changed files with 99,376 additions and 54,648 deletions.
3 changes: 3 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,7 @@ StatementMacros:
TabWidth: 8
UseCRLF: false
UseTab: Never
---
Language: Proto
BasedOnStyle: Google
...
74 changes: 74 additions & 0 deletions .github/workflows/build-cpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
name: LBANN CPU

on:
push:
branches: develop
pull_request:
branches: develop
merge_group:
branches: develop

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
compiler: [gcc] # , clang

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.10
uses: actions/setup-python@v5
with:
python-version: '3.10'
cache: 'pip'

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y libyaml-dev cmake lmod ninja-build
sudo apt-get install -y libblas-dev libopenblas-dev liblapacke-dev
sudo apt-get install -y openmpi-bin openmpi-common libopenmpi-dev
python -m pip install --upgrade pip
- name: Restore cached Spack-built dependencies
id: cache-spack
uses: actions/cache/restore@v3
with:
path: |
~/.spack
spack
key: ${{ runner.os }}-${{ matrix.compiler }}-spackdeps

- name: Build and install LBANN dependencies
if: steps.cache-spack.outputs.cache-hit != 'true'
run: |
source /usr/share/lmod/lmod/init/bash
git clone -c feature.manyFiles=true https://github.com/spack/spack.git
cd spack
git checkout 73858df14dc3f0e701814c84bb8bd6b72f80a806 # Use a tried and true version of Spack
cd ..
source spack/share/spack/setup-env.sh
scripts/build_lbann.sh -d --dependencies-only -l ci -- +numpy +unit_tests %${{ matrix.compiler }}
- name: Cache Spack-built dependencies
id: cache-spack-save
uses: actions/cache/save@v3
with:
path: |
~/.spack
spack
key: ${{ runner.os }}-${{ matrix.compiler }}-spackdeps

- name: Build LBANN
run: |
source /usr/share/lmod/lmod/init/bash
source spack/share/spack/setup-env.sh
scripts/build_lbann.sh -r -l ci --ci -- +numpy +unit_tests %${{ matrix.compiler }}
- name: Test Catch2
run: |
cd builds/*/build
./unit_test/helpers_tests
./unit_test/seq-catch-tests
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,12 @@ data.prototext*

# Can also ignore all directories and files in a directory.
# tmp/**/*
build
builds
spack_environments/users/


# we don't want to collect slurm output
**/slurm-*.out

# Ignore default lbann output experiment directory names
????????_??????_lbann*/
64 changes: 60 additions & 4 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,32 @@
# clusters. To run testing locally, consult the README in the ci_test
# directory.

variables:
FF_USE_NEW_BASH_EVAL_STRATEGY: 'true'
FF_ENABLE_BASH_EXIT_CODE_CHECK: 1
LBANN_CI_CLEAN_BUILD: 'true'

stages:
- run-all-clusters

catalyst testing:
corona testing:
stage: run-all-clusters
variables:
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
trigger:
strategy: depend
include: .gitlab/catalyst/pipeline.yml
include: .gitlab/corona/pipeline.yml

corona testing:
corona distconv testing:
stage: run-all-clusters
variables:
JOB_NAME_SUFFIX: _distconv
SPACK_ENV_BASE_NAME_MODIFIER: "-distconv"
SPACK_SPECS: "+rocm +distconv"
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
TEST_FLAG: "test_*_distconv.py"
trigger:
strategy: depend
include: .gitlab/corona/pipeline.yml
Expand All @@ -51,31 +62,76 @@ lassen testing:
stage: run-all-clusters
variables:
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
trigger:
strategy: depend
include: .gitlab/lassen/pipeline.yml

lassen distconv testing:
stage: run-all-clusters
variables:
JOB_NAME_SUFFIX: _distconv
SPACK_ENV_BASE_NAME_MODIFIER: "-multi-stage-distconv"
SPACK_SPECS: "+cuda +distconv +fft"
# SPACK_SPECS: "+cuda +distconv +nvshmem +fft"
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
TEST_FLAG: "test_*_distconv.py"
trigger:
strategy: depend
include: .gitlab/lassen/multi_stage_pipeline.yml

pascal testing:
stage: run-all-clusters
variables:
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
trigger:
strategy: depend
include: .gitlab/pascal/pipeline.yml

pascal compiler testing:
stage: run-all-clusters
variables:
SPACK_SPECS: "%gcc@8.3.1 +cuda +half +fft"
SPACK_SPECS: "%gcc@10.3.1 +cuda +half +fft"
BUILD_SCRIPT_OPTIONS: "--no-default-mirrors"
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
trigger:
strategy: depend
include: .gitlab/pascal/pipeline_compiler_tests.yml

pascal distconv testing:
stage: run-all-clusters
variables:
JOB_NAME_SUFFIX: _distconv
SPACK_SPECS: "%[email protected] +cuda +distconv +fft"
BUILD_SCRIPT_OPTIONS: "--no-default-mirrors"
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
TEST_FLAG: "test_*_distconv.py"
trigger:
strategy: depend
include: .gitlab/pascal/pipeline.yml

tioga testing:
stage: run-all-clusters
variables:
# FF_USE_NEW_BASH_EVAL_STRATEGY: 1
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
trigger:
strategy: depend
include: .gitlab/tioga/pipeline.yml

tioga distconv testing:
stage: run-all-clusters
variables:
JOB_NAME_SUFFIX: _distconv
SPACK_ENV_BASE_NAME_MODIFIER: "-distconv"
SPACK_SPECS: "+rocm +distconv"
WITH_WEEKLY: "${LBANN_CI_RUN_WEEKLY}"
WITH_CLEAN_BUILD: "${LBANN_CI_CLEAN_BUILD}"
TEST_FLAG: "test_*_distconv.py"
trigger:
strategy: depend
include: .gitlab/tioga/pipeline.yml
Loading

0 comments on commit d736a53

Please sign in to comment.