Skip to content

Commit 8a76eea

Browse files
authored
Merge pull request #100 from legend-exp/ci
CI improvements
2 parents 9d7303a + 8961beb commit 8a76eea

17 files changed

+239
-64
lines changed

.github/workflows/main.yml

+2-15
Original file line numberDiff line numberDiff line change
@@ -68,21 +68,8 @@ jobs:
6868
token: ${{ secrets.CLONE_LEGEND_METADATA }}
6969
path: ${{ env.LEGEND_METADATA }}
7070

71-
- name: Get dependencies and install legend-dataflow
72-
run: |
73-
python -m pip install --upgrade uv
74-
python -m uv pip install --upgrade .[runprod]
75-
76-
- name: Set the PRODENV variable
77-
run: |
78-
echo "PRODENV=$(realpath $GITHUB_WORKSPACE/..)" >> $GITHUB_ENV
79-
80-
- name: run workflows in dry-run mode
81-
run: |
82-
snakemake --workflow-profile workflow/profiles/lngs-build-raw -n all-*-daq.gen
83-
snakemake --workflow-profile workflow/profiles/lngs-build-raw -n all-*-raw.gen
84-
snakemake --workflow-profile workflow/profiles/lngs -n all-*-evt.gen
85-
snakemake --workflow-profile workflow/profiles/lngs -n all-*-skm.gen
71+
- name: Run data production tests
72+
run: ./tests/runprod/run-all.sh
8673

8774
test-coverage:
8875
name: Calculate and upload test coverage

codecov.yml

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
codecov:
2+
require_ci_to_pass: true
3+
4+
coverage:
5+
status:
6+
project:
7+
default:
8+
enabled: no
9+
patch:
10+
default:
11+
enabled: no
12+
changes:
13+
default:
14+
enabled: no
15+
16+
github_checks:
17+
annotations: false

pyproject.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ dynamic = ["version"]
5151

5252
dependencies = [
5353
"colorlog",
54-
"dbetto>=1.2.0",
54+
"dbetto>=1.2",
5555
"pygama>=2",
5656
"dspeed>=1.6",
57-
"pylegendmeta==1.2.0a2",
57+
"pylegendmeta>=1.2",
5858
"legend-pydataobj>=1.11.6",
5959
"legend-daq2lh5>=1.4",
6060
"pip",

tests/runprod/conftest.sh

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/usr/bin/env bash
2+
3+
# IMPORTANT: this script must be *sourced* from the legend-dataflow directory
4+
5+
_prod_cycle="$(realpath .)"
6+
7+
function get_dataflow_config_value() {
8+
python -c "import dbetto; print(dbetto.AttrsDict(dbetto.utils.load_dict('${_prod_cycle}/dataflow-config.yaml')).${1})" \
9+
| sed "s|\$_|${_prod_cycle}|g"
10+
}
11+
12+
run_test_command() {
13+
echo "::notice::$*"
14+
15+
output=$("$@" 2>&1)
16+
status=$?
17+
18+
if [ $status -ne 0 ]; then
19+
echo "::error::command failed with status $status"
20+
echo "$output"
21+
fi
22+
23+
return $status
24+
}
25+
26+
27+
export -f get_dataflow_config_value run_test_command
28+
29+
PRODENV="$(realpath ..)"
30+
export PRODENV

tests/runprod/install.sh

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!/usr/bin/env bash
2+
3+
# IMPORTANT: this script must be executed from the legend-dataflow directory
4+
5+
echo "::group::setting up test environment"
6+
7+
PRODENV="$(realpath ..)"
8+
export PRODENV
9+
10+
python -m pip --quiet install --upgrade pip wheel setuptools
11+
python -m pip --quiet install --upgrade '.[runprod]'
12+
13+
dataprod -v install --remove --system bare -- dataflow-config.yaml
14+
15+
echo "::endgroup::"

tests/runprod/run-all.sh

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/usr/bin/env bash
2+
3+
# IMPORTANT: this script must be executed from the legend-dataflow directory
4+
5+
./tests/runprod/install.sh
6+
7+
for test in tests/runprod/test-*.sh; do
8+
echo "::group::test $test"
9+
./"$test" || exit 1
10+
echo "::endgroup::"
11+
done

tests/runprod/test-evt.sh

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/usr/bin/env bash
2+
3+
# IMPORTANT: this script must be executed from the legend-dataflow directory
4+
5+
# shellcheck disable=SC1091
6+
source "$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)/conftest.sh"
7+
8+
rawdir="$(get_dataflow_config_value paths.tier_raw)"
9+
mkdir -p "${rawdir}" || exit 1
10+
11+
function mkdir_n_touch() {
12+
mkdir -p "$(dirname "${1}")" || return 1
13+
touch "${1}" || return 1
14+
}
15+
16+
rawfiles=(
17+
phy/p04/r001/l200-p04-r001-phy-20230421T174901Z-tier_raw.lh5
18+
phy/p04/r000/l200-p04-r000-phy-20230415T033517Z-tier_raw.lh5
19+
phy/p03/r001/l200-p03-r001-phy-20230318T015140Z-tier_raw.lh5
20+
phy/p03/r000/l200-p03-r000-phy-20230312T043356Z-tier_raw.lh5
21+
phy/p03/r002/l200-p03-r002-phy-20230324T205907Z-tier_raw.lh5
22+
cal/p04/r001/l200-p04-r001-cal-20230421T131817Z-tier_raw.lh5
23+
cal/p04/r000/l200-p04-r000-cal-20230414T215158Z-tier_raw.lh5
24+
cal/p03/r001/l200-p03-r001-cal-20230317T211819Z-tier_raw.lh5
25+
cal/p03/r000/l200-p03-r000-cal-20230311T235840Z-tier_raw.lh5
26+
cal/p03/r002/l200-p03-r002-cal-20230324T161401Z-tier_raw.lh5
27+
anp/p13/r002/l200-p13-r002-anp-20241217T094846Z-tier_raw.lh5
28+
anc/p13/r006/l200-p13-r006-anc-20241221T150249Z-tier_raw.lh5
29+
acs/p13/r006/l200-p13-r006-acs-20241221T150307Z-tier_raw.lh5
30+
)
31+
32+
(
33+
cd "${rawdir}" || exit 1
34+
for file in "${rawfiles[@]}"; do
35+
mkdir_n_touch "$file"
36+
done
37+
)
38+
39+
inputs="$(get_dataflow_config_value paths.metadata)"
40+
41+
# FIXME: remove these at some point
42+
touch "$inputs/dataprod/overrides/dsp/cal/p03/r000/l200-p03-r000-cal-20230311T235840Z-par_dsp_svm_train.lh5"
43+
touch "$inputs/dataprod/overrides/dsp/cal/p04/r000/l200-p04-r000-cal-20230414T215158Z-par_dsp_svm_train.lh5"
44+
45+
_smk_opts=(
46+
--touch
47+
--workflow-profile workflow/profiles/default
48+
)
49+
50+
run_test_command snakemake "${_smk_opts[@]}" "all-*-evt.gen" || exit 1

tests/runprod/test-raw.sh

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/usr/bin/env bash
2+
3+
# IMPORTANT: this script must be executed from the legend-dataflow directory
4+
5+
# shellcheck disable=SC1091
6+
source "$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)/conftest.sh"
7+
8+
sandbox=$(get_dataflow_config_value paths.sandbox_path)
9+
mkdir -p "${sandbox}"
10+
11+
(
12+
cd "${sandbox}" || exit 1
13+
touch \
14+
l200-p03-r000-cal-20230311T235840Z.orca \
15+
l200-p03-r001-cal-20230317T211819Z.orca \
16+
l200-p03-r002-cal-20230324T161401Z.orca \
17+
l200-p04-r000-cal-20230414T215158Z.orca \
18+
l200-p04-r001-cal-20230421T131817Z.orca \
19+
l200-p03-r000-phy-20230312T043356Z.orca \
20+
l200-p03-r001-phy-20230318T015140Z.orca \
21+
l200-p03-r002-phy-20230324T205907Z.orca \
22+
l200-p04-r000-phy-20230415T033517Z.orca \
23+
l200-p04-r001-phy-20230421T174901Z.orca \
24+
l200-p13-r006-acs-20241221T150307Z.fcio \
25+
l200-p13-r006-anc-20241221T150249Z.fcio \
26+
l200-p13-r002-anp-20241217T094846Z.fcio
27+
)
28+
29+
# FIXME: --touch does not do what I thought. need to add this functionality to
30+
# the future plugin
31+
_smk_opts=(
32+
--forcerun
33+
--touch
34+
--config system=bare
35+
--cores all
36+
--workflow-profile workflow/profiles/lngs-build-raw
37+
)
38+
39+
for tier in daq raw; do
40+
run_test_command snakemake "${_smk_opts[@]}" "all-*-${tier}.gen" || exit 1
41+
done
42+
43+
rm -rf "${sandbox}"

workflow/Snakefile

+4-21
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,11 @@ localrules:
8080
onstart:
8181
print("INFO: starting workflow")
8282
# Make sure some packages are initialized before we begin to avoid race conditions
83-
for pkg in ["dspeed", "lgdo", "matplotlib"]:
84-
shell(execenv.execenv_pyexe(config, "python") + "-c 'import " + pkg + "'")
83+
if not workflow.touch:
84+
for pkg in ["dspeed", "lgdo", "matplotlib"]:
85+
shell(execenv.execenv_pyexe(config, "python") + "-c 'import " + pkg + "'")
8586

86-
# Log parameter catalogs in validity files
87+
# Log parameter catalogs in validity files
8788
hit_par_cat_file = Path(utils.pars_path(config)) / "hit" / "validity.yaml"
8889
if hit_par_cat_file.is_file():
8990
hit_par_cat_file.unlink()
@@ -122,29 +123,11 @@ onstart:
122123

123124

124125
onsuccess:
125-
from snakemake.report import auto_report
126-
127-
128-
rep_dir = f"{log_path(config)}/report-{datetime.strftime(datetime.utcnow(), '%Y%m%dT%H%M%SZ')}"
129-
Path(rep_dir).mkdir(parents=True, exist_ok=True)
130-
# auto_report(workflow.persistence.dag, f"{rep_dir}/report.html")
131-
auto_report(workflow.persistence.dag, report_plugin, report_settings)
132-
133-
with open(os.path.join(rep_dir, "dag.txt"), "w") as f:
134-
f.writelines(str(workflow.persistence.dag))
135-
# shell(f"cat {rep_dir}/dag.txt | dot -Tpdf > {rep_dir}/dag.pdf")
136-
137-
with open(f"{rep_dir}/rg.txt", "w") as f:
138-
f.writelines(str(workflow.persistence.dag.rule_dot()))
139-
# shell(f"cat {rep_dir}/rg.txt | dot -Tpdf > {rep_dir}/rg.pdf")
140-
141-
# remove .gen files
142126
files = glob.glob("*.gen")
143127
for file in files:
144128
if os.path.isfile(file):
145129
os.remove(file)
146130

147-
# remove filelists
148131
files = glob.glob(os.path.join(utils.filelist_path(config), "*"))
149132
for file in files:
150133
if os.path.isfile(file):

workflow/Snakefile-build-raw

+17-6
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,10 @@ onstart:
5454
print("INFO: initializing workflow")
5555

5656
# Make sure some packages are initialized before we send jobs to avoid race conditions
57-
shell(execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5, matplotlib'")
57+
if not workflow.touch:
58+
shell(
59+
execenv.execenv_pyexe(config, "python") + " -c 'import daq2lh5, matplotlib'"
60+
)
5861

5962
raw_par_cat_file = Path(utils.pars_path(config)) / "raw" / "validity.yaml"
6063
if raw_par_cat_file.is_file():
@@ -87,16 +90,24 @@ rule gen_filelist:
8790

8891

8992
rule sort_data:
90-
"""
91-
This rules moves the daq data from the unsorted sandbox dir
92-
to the sorted dirs under generated
93+
"""Move DAQ data from sandbox to organized folder.
94+
95+
This rules moves the DAQ data from the unsorted sandbox directory to the
96+
correct location in the `tier_raw` folder.
9397
"""
9498
input:
95-
patt.get_pattern_tier_daq_unsorted(config, extension="fcio"),
99+
patt.get_pattern_tier_daq_unsorted(config),
96100
output:
97-
patt.get_pattern_tier_daq(config, extension="fcio"),
101+
patt.get_pattern_tier_daq(config),
98102
shell:
99103
"mv {input} {output}"
100104

101105

106+
use rule sort_data as sort_data_fcio with:
107+
input:
108+
patt.get_pattern_tier_daq_unsorted(config, extension="fcio"),
109+
output:
110+
patt.get_pattern_tier_daq(config, extension="fcio"),
111+
112+
102113
# vim: filetype=snakemake

workflow/rules/ann.smk

+6-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ from legenddataflow.execenv import execenv_pyexe
1515
rule build_ann:
1616
input:
1717
dsp_file=get_pattern_tier(config, "dsp", check_in_cycle=False),
18-
pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"),
18+
pars_file=lambda wildcards: get_input_par_file(
19+
setup=config, wildcards=wildcards, tier="ann", name="cuts"
20+
),
1921
params:
2022
timestamp="{timestamp}",
2123
datatype="{datatype}",
@@ -45,7 +47,9 @@ rule build_ann:
4547
rule build_pan:
4648
input:
4749
dsp_file=get_pattern_tier(config, "psp", check_in_cycle=False),
48-
pars_file=lambda wildcards: get_input_par_file(wildcards, "ann", "cuts"),
50+
pars_file=lambda wildcards: get_input_par_file(
51+
setup=config, wildcards=wildcards, tier="ann", name="cuts"
52+
),
4953
params:
5054
timestamp="{timestamp}",
5155
datatype="{datatype}",

workflow/rules/common.smk

+7-2
Original file line numberDiff line numberDiff line change
@@ -66,16 +66,21 @@ def set_last_rule_name(workflow, new_name):
6666
workflow.check_localrules()
6767

6868

69-
def get_input_par_file(wildcards, tier, name):
69+
def get_input_par_file(setup, wildcards, tier, name):
70+
allow_none = setup.get("allow_none", False)
7071
par_overwrite_file = Path(patt.par_overwrite_path(config)) / tier / "validity.yaml"
7172
pars_files_overwrite = Catalog.get_files(
7273
par_overwrite_file,
7374
wildcards.timestamp,
75+
category=wildcards.datatype if hasattr(wildcards, "datatype") else "all",
7476
)
7577
for pars_file in pars_files_overwrite:
7678
if name in str(pars_file):
7779
return Path(patt.par_overwrite_path(config)) / tier / pars_file
78-
raise ValueError(f"Could not find model in {pars_files_overwrite}")
80+
if allow_none or (wildcards.datatype != "phy"):
81+
return []
82+
else:
83+
raise ValueError(f"Could not find model in {pars_files_overwrite}")
7984

8085

8186
def get_overwrite_file(tier, wildcards=None, timestamp=None, name=None):

workflow/rules/dsp_pars_geds.smk

+4-2
Original file line numberDiff line numberDiff line change
@@ -214,10 +214,12 @@ rule build_pars_dsp_eopt_geds:
214214
rule build_svm_dsp_geds:
215215
input:
216216
hyperpars=lambda wildcards: get_input_par_file(
217-
wildcards, "dsp", "svm_hyperpars"
217+
setup=config, wildcards=wildcards, tier="dsp", name="svm_hyperpars"
218218
),
219219
train_data=lambda wildcards: str(
220-
get_input_par_file(wildcards, "dsp", "svm_hyperpars")
220+
get_input_par_file(
221+
setup=config, wildcards=wildcards, tier="dsp", name="svm_hyperpars"
222+
)
221223
).replace("hyperpars.yaml", "train.lh5"),
222224
params:
223225
timestamp="{timestamp}",

workflow/rules/evt.smk

+2-2
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ rule build_evt:
2626
config, wildcards.timestamp, "hit"
2727
),
2828
xtalk_matrix=lambda wildcards: get_input_par_file(
29-
tier="evt", wildcards=wildcards, name="xtc"
29+
setup=config, tier="evt", wildcards=wildcards, name="xtc"
3030
),
3131
output:
3232
get_pattern_tier(config, "evt", check_in_cycle=check_in_cycle),
@@ -77,7 +77,7 @@ rule build_pet:
7777
config, wildcards.timestamp, "pht"
7878
),
7979
xtalk_matrix=lambda wildcards: get_input_par_file(
80-
tier="pet", wildcards=wildcards, name="xtc"
80+
setup=config, tier="pet", wildcards=wildcards, name="xtc"
8181
),
8282
output:
8383
get_pattern_tier(config, "pet", check_in_cycle=check_in_cycle),

workflow/rules/psp_pars_geds.smk

+4-2
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,12 @@ workflow._ruleorder.add(*rule_order_list) # [::-1]
167167
rule build_svm_psp:
168168
input:
169169
hyperpars=lambda wildcards: get_input_par_file(
170-
wildcards, "psp", "svm_hyperpars"
170+
setup=config, wildcards=wildcards, tier="psp", name="svm_hyperpars"
171171
),
172172
train_data=lambda wildcards: str(
173-
get_input_par_file(wildcards, "psp", "svm_hyperpars")
173+
get_input_par_file(
174+
setup=config, wildcards=wildcards, tier="psp", name="svm_hyperpars"
175+
)
174176
).replace("hyperpars.yaml", "train.lh5"),
175177
output:
176178
dsp_pars=get_pattern_pars(config, "psp", "svm", "pkl"),

0 commit comments

Comments
 (0)