diff --git a/GRID/utils/grid_submit.sh b/GRID/utils/grid_submit.sh index 7dddd5e80..b3d6372d2 100755 --- a/GRID/utils/grid_submit.sh +++ b/GRID/utils/grid_submit.sh @@ -216,6 +216,7 @@ while [ $# -gt 0 ] ; do --prodsplit) PRODSPLIT=$2; shift 2 ;; # allows to set JDL production split level (useful to easily replicate workflows) --singularity) SINGULARITY=ON; shift 1 ;; # run everything inside singularity --wait) WAITFORALIEN=ON; shift 1 ;; #wait for alien jobs to finish + --wait-any) WAITFORALIENANY=ON; WAITFORALIEN=ON; shift 1 ;; #wait for any good==done alien jobs to return --outputspec) OUTPUTSPEC=$2; shift 2 ;; #provide comma separate list of JDL file specs to be put as part of JDL Output field (example '"*.log@disk=1","*.root@disk=2"') -h) Usage ; exit ;; --help) Usage ; exit ;; @@ -227,6 +228,7 @@ export JOBTTL export JOBLABEL export MATTERMOSTHOOK export CONTROLSERVER + [[ $PRODSPLIT -gt 100 ]] && echo "Production split needs to be smaller than 100 for the moment" && exit 1 # check for presence of jq (needed in code path to fetch output files) @@ -270,9 +272,10 @@ pok "Set the job name by running $0 " # Generate local workdir # if [[ "${ONGRID}" == "0" ]]; then - WORKDIR=${WORKDIR:-/tmp/alien_work/$(basename "$MY_JOBWORKDIR")} - [ ! -d "${WORKDIR}" ] && mkdir -p ${WORKDIR} - [ ! "${CONTINUE_WORKDIR}" ] && cp "${MY_JOBSCRIPT}" "${WORKDIR}/alien_jobscript.sh" + GRID_SUBMIT_WORKDIR=${GRID_SUBMIT_WORKDIR:-/tmp/alien_work/$(basename "$MY_JOBWORKDIR")} + echo "WORKDIR FOR THIS JOB IS ${GRID_SUBMIT_WORKDIR}" + [ ! -d "${GRID_SUBMIT_WORKDIR}" ] && mkdir -p ${GRID_SUBMIT_WORKDIR} + [ ! "${CONTINUE_WORKDIR}" ] && cp "${MY_JOBSCRIPT}" "${GRID_SUBMIT_WORKDIR}/alien_jobscript.sh" fi # @@ -349,7 +352,7 @@ if [[ "${IS_ALIEN_JOB_SUBMITTER}" ]]; then cd "$(dirname "$0")" THIS_SCRIPT="$PWD/$(basename "$0")" - cd "${WORKDIR}" + cd "${GRID_SUBMIT_WORKDIR}" QUOT='"' # ---- Generate JDL ---------------- @@ -436,11 +439,18 @@ EOF continue fi let counter=0 # reset counter - JOBSTATUS=$(alien.py ps -j ${MY_JOBID} | awk '//{print $3}') - # echo -ne "Waiting for jobs to return; Last status ${JOBSTATUS}" + + # this is the global job status (a D here means the production is done) + JOBSTATUS=$(alien.py ps -j ${MY_JOBID} | awk '//{print $3}') # this is the global job status + # in addition we may query individual splits + if [ "${WAITFORANY}" ]; then + if ALIENPY_JSON=true alien.py ps -a -m "${MY_JOBID}" | grep "status" | grep -q "DONE"; then + JOBSTATUS="D" # a D here means == some job finished successfully + fi + fi if [ "${JOBSTATUS}" == "D" ]; then - echo "Job done" + echo "${WAITFORALIENANY:+At least one }Job(s) done" WAITFORALIEN="" # guarantees to go out of outer while loop if [ "${FETCHOUTPUT}" ]; then @@ -473,10 +483,6 @@ EOF done fi fi - if [[ "${FOO:0:1}" == [EK] ]]; then - echo "Job error occured" - exit 1 - fi done # get the job data products locally if requested @@ -490,7 +496,7 @@ if [[ ${SINGULARITY} ]]; then # if singularity was asked we restart this script within a container # it's actually much like the GRID mode --> which is why we set JALIEN_TOKEN_CERT set -x - cp $0 ${WORKDIR} + cp $0 ${GRID_SUBMIT_WORKDIR} # detect architecture (ARM or X86) ARCH=$(uname -i) @@ -508,15 +514,15 @@ if [[ ${SINGULARITY} ]]; then APPTAINER_EXEC="/cvmfs/alice.cern.ch/containers/bin/apptainer/${ARCH}/current/bin/apptainer" # we can actually analyse the local JDL to find the package and set it up for the container - ${APPTAINER_EXEC} exec -C -B /cvmfs:/cvmfs,${WORKDIR}:/workdir --pwd /workdir -C ${CONTAINER} /workdir/grid_submit.sh \ + ${APPTAINER_EXEC} exec -C -B /cvmfs:/cvmfs,${GRID_SUBMIT_WORKDIR}:/workdir --pwd /workdir -C ${CONTAINER} /workdir/grid_submit.sh \ ${CONTINUE_WORKDIR:+"-c ${CONTINUE_WORKDIR}"} --local ${O2TAG:+--o2tag ${O2TAG}} --ttl ${JOBTTL} --label ${JOBLABEL:-label} ${MATTERMOSTHOOK:+--mattermost ${MATTERMOSTHOOK}} ${CONTROLSERVER:+--controlserver ${CONTROLSERVER}} set +x exit $? fi if [[ "${ONGRID}" == 0 ]]; then - banner "Executing job in directory ${WORKDIR}" - cd "${WORKDIR}" 2> /dev/null + banner "Executing job in directory ${GRID_SUBMIT_WORKDIR}" + cd "${GRID_SUBMIT_WORKDIR}" 2> /dev/null fi exec &> >(tee -a alien_log_${ALIEN_PROC_ID:-0}.txt) diff --git a/MC/run/ANCHOR/tests/test_anchor_2tag_template.sh b/MC/run/ANCHOR/tests/test_anchor_2tag_template.sh new file mode 100644 index 000000000..f97e7f233 --- /dev/null +++ b/MC/run/ANCHOR/tests/test_anchor_2tag_template.sh @@ -0,0 +1,57 @@ +#!/bin/bash +#JDL_OUTPUT=*.txt@disk=1,AO2D.root@disk=2,*.log@disk=1,*stat*@disk=1,*.json@disk=1,debug*tgz@disk=2 +#JDL_ERROROUTPUT=*.txt@disk=1,AO2D.root@disk=2,*.log@disk=1,*.json@disk=1,debug*tgz@disk=2 +#JDL_PACKAGE=%{SOFTWARETAG_SIM} +#JDL_REQUIRE=%{JDL_REQUIREMENT} + +# +# A template anchoring script to test various anchoring setups +# and software combinations +# + +# only relevant if executed locally +if [ ! ${O2_ROOT} ]; then + source <(/cvmfs/alice.cern.ch/bin/alienv printenv %{SOFTWARETAG_SIM}) +fi + +# meta configuration of the job (influences reco config) +export ALIEN_JDL_LPMPRODUCTIONTYPE=MC +export ALIEN_JDL_CPULIMIT=8 + +export ALIEN_JDL_LPMANCHORPASSNAME=%{PASSNAME} +export ALIEN_JDL_MCANCHOR=%{PASSNAME} +export ALIEN_JDL_COLLISIONSYSTEM=%{COL_SYSTEM} +export ALIEN_JDL_LPMPASSNAME=%{PASSNAME} +export ALIEN_JDL_LPMRUNNUMBER=%{RUN_NUMBER} +export ALIEN_JDL_LPMANCHORRUN=%{RUN_NUMBER} + +export ALIEN_JDL_LPMINTERACTIONTYPE=%{INTERACTIONTYPE} +export ALIEN_JDL_LPMPRODUCTIONTAG=%{PRODUCTION_TAG} +export ALIEN_JDL_LPMANCHORPRODUCTION=%{ANCHOR_PRODUCTION} +export ALIEN_JDL_LPMANCHORYEAR=%{ANCHORYEAR} +export ALIEN_JDL_O2DPG_ASYNC_RECO_TAG="%{SOFTWARETAG_ASYNC}" + +# get custom O2DPG for 2tag treatment (could be used to test different O2DPG branches) +# git clone https://github.com/AliceO2Group/O2DPG O2DPG +# export O2DPG_ROOT=${PWD}/O2DPG +# export ALIEN_JDL_O2DPG_OVERWRITE=${PWD}/O2DPG + +# dimension the job +export NTIMEFRAMES=1 + +# further configuration of the job +export ALIEN_JDL_ADDTIMESERIESINMC=0 +export DISABLE_QC=1 +export ALIEN_JDL_MC_ORBITS_PER_TF=10000:10000000:2 # puts just 2 orbit for large enough interaction rates +export ALIEN_JDL_O2DPGWORKFLOWTARGET="aod" + +# select anchoring points +export PRODSPLIT=${ALIEN_O2DPG_GRIDSUBMIT_PRODSPLIT:-100} +export SPLITID=${ALIEN_O2DPG_GRIDSUBMIT_SUBJOBID:-50} +export CYCLE=0 + +# generator and other sim configuration +export ALIEN_JDL_ANCHOR_SIM_OPTIONS="%{SIM_OPTIONS}" + +# execute MC +# ${O2DPG_ROOT}/MC/run/ANCHOR/anchorMC.sh diff --git a/MC/run/ANCHOR/tests/test_anchor_cases.csv b/MC/run/ANCHOR/tests/test_anchor_cases.csv new file mode 100644 index 000000000..490b7bba5 --- /dev/null +++ b/MC/run/ANCHOR/tests/test_anchor_cases.csv @@ -0,0 +1,22 @@ +# comment (no empty lines allowed) +%{SOFTWARETAG_SIM},%{SOFTWARETAG_ASYNC},%{PASSNAME},%{COL_SYSTEM},%{RUN_NUMBER},%{INTERACTIONTYPE},%{ANCHOR_PRODUCTION},%{ANCHORYEAR},%{SIM_OPTIONS},%{PRODUCTION_TAG} +#O2sim::v20250306-1,O2PDPSuite::async-async-v1-01-08-slc9-alidist-async-v1-01-01-1,apass7,p-p,526641,pp,LHC25a9_Plus10,LHC22o,2022,-gen pythia8 +#O2sim::v20250305-1,O2PDPSuite::async-async-v1-01-12-slc9-alidist-async-v1-01-01-1,apass1,p-p,551398,pp,LHC25a7_Plus10,LHC24ag,2024,-gen pythia8 +#O2sim::v20250305-1,O2PDPSuite::async-async-v1-02-10-slc9-alidist-async-v1-02-01-1,apass1,Pb-Pb,559544,PbPb,LHC25c5b,LHC24ar,2024,-gen pythia8 -confKey 'SimCutParams.globalDensityFactor=0.9f' +#O2sim::v20250806-1,O2PDPSuite::async-async-2024-PbPb-apass1-v2-slc9-alidist-async-2024-PbPb-apass1-v2-1,apass1,Pb-Pb,559544,PbPb,LHC25c5b,LHC24ar,2024,-gen pythia8 +# 2022 +# O2PDPSuite::async-async-2022-pp-apass7-v1,apass7, | 526641 +# 2023 +O2sim::v20250806-1,O2PDPSuite::async-async-2023-PbPb-apass5-v5-slc9-alidist-async-2023-PbPb-apass5-v5-1,apass5,Pb-Pb,544091,PbPb,LHC23zzh,2023,-gen pythia8 +# 2024 +O2sim::v20250806-1,O2PDPSuite::async-async-2024-pp-apass1-v7-slc9-alidist-async-2024-pp-apass1-v7-1,apass1,p-p,553185,pp,LHC24al,2024,-gen pythia8 +O2sim::v20250806-1,O2PDPSuite::async-async-2024-ppRef-apass1-v1-slc9-alidist-async-2024-ppRef-apass1-v1-1,apass1,p-p,559348,pp,LHC24ap,2024,-gen pythia8 +O2sim::v20250806-1,O2PDPSuite::async-async-2024-pbpb-apass2-v3-slc9-alidist-async-2024-pbpb-apass2-v3-1,apass2,Pb-Pb,559545,PbPb,LHC24ar,2024,-gen pythia8 +# 2025 +O2sim::v20250806-1,O2PDPSuite::async-async-2025-pO-apass1-v2-slc9-alidist-async-2025-pO-apass1-v2-1,apass1,p-O,564251,pO,LHC25ad,2025,-gen pythia8 +O2sim::v20250806-1,O2PDPSuite::async-async-2025-OO-apass1-v2-slc9-alidist-async-2025-OO-apass1-v2-1,apass1,O-O,564356,OO,LHC25ae,2025,-gen pythia8 +O2sim::v20250806-1,O2PDPSuite::async-async-2025-NeNe-apass1-v2-slc9-alidist-async-2025-NeNe-apass1-v2-1,apass1,Ne-Ne,564468,NeNe,LHC25af,2025,-gen pythia8 +# apass2 +O2sim::v20250806-1,O2PDPSuite::async-async-2025-pO-apass2-v1-slc9-alidist-async-2025-pO-apass2-v1-1,apass2,p-O,564251,pO,LHC25ad,2025,-gen pythia8 +O2sim::v20250806-1,O2PDPSuite::async-async-2025-OO-apass2-v1-slc9-alidist-async-2025-OO-apass2-v1-1,apass2,O-O,564356,OO,LHC25ae,2025,-gen pythia8 +O2sim::v20250806-1,O2PDPSuite::async-async-2025-NeNe-apass2-v1-slc9-alidist-async-2025-NeNe-apass2-v1-1,apass2,Ne-Ne,564468,NeNe,LHC25af,2025,-gen pythia8 \ No newline at end of file diff --git a/MC/run/ANCHOR/tests/test_looper.sh b/MC/run/ANCHOR/tests/test_looper.sh new file mode 100755 index 000000000..7a243746b --- /dev/null +++ b/MC/run/ANCHOR/tests/test_looper.sh @@ -0,0 +1,142 @@ +#!/bin/bash +# loops over all test cases and executes them + +# Read the CSV file +INPUT_FILE="test_anchor_cases.csv" +TEMPLATE_FILE="test_anchor_2tag_template.sh" +OUTPUT_FILE="test_anchor_generated" + +DAILYTAGTOTEST=${1:-O2sim::v20250804-1} + +SITES_FILE="test_GRID_sites.dat" + +WORKING_DIR="${PWD}/workdir_$(date +%s)_$RANDOM" +echo "WORKING DIR ${WORKING_DIR}" +mkdir -p ${WORKING_DIR} + +INPUT_FILE_STRIPPED=${WORKING_DIR}/${INPUT_FILE}_clean + +REQUIRE_STRING="" +{ + while read -r -a values; do + if [ ! "${REQUIRE_STRING}" == "" ]; then + REQUIRE_STRING="${REQUIRE_STRING} ||" + fi + REQUIRE_STRING="${REQUIRE_STRING} (other.CE == \"${values}\")" + done +} < ${SITES_FILE} +REQUIRE_STRING="(${REQUIRE_STRING});" + +echo "REQUIRE STRING ${REQUIRE_STRING}" + +# strip comments from CSV file +grep -v '#' ${INPUT_FILE} > ${INPUT_FILE_STRIPPED} + +# Read the header line and convert it into variable names +IFS=',' read -r -a headers < "$INPUT_FILE_STRIPPED" + +# Replace placeholders in the header (e.g., %{VAR} → VAR) +for i in "${!headers[@]}"; do + headers[$i]=$(echo "${headers[$i]}" | sed -E 's/#?%\{//;s/\}//g') +done + +# Read and process each subsequent line +{ + read # Skip the header line + + count=1 # Counter for output files + datestring=$(date +"%Y%m%d_%H%M%S") + while IFS=',' read -r -a values; do + # Assign each value to its corresponding variable + for i in "${!headers[@]}"; do + declare "${headers[$i]}"="${values[$i]}" + done + + PRODUCTION_TAG="2tagtest_${datestring}_${count}" + # Example: Print assigned variables + echo "SOFTWARETAG_SIM: $SOFTWARETAG_SIM" + echo "SOFTWARETAG_ASYNC: $SOFTWARETAG_ASYNC" + echo "PASSNAME: $PASSNAME" + echo "COL_SYSTEM: $COL_SYSTEM" + echo "RUN_NUMBER: $RUN_NUMBER" + echo "INTERACTIONTYPE: $INTERACTIONTYPE" + echo "PRODUCTION_TAG: $PRODUCTION_TAG" + echo "ANCHOR_PRODUCTION: $ANCHOR_PRODUCTION" + echo "ANCHORYEAR: $ANCHORYEAR" + echo "SIM_OPTIONS: $SIM_OPTIONS" + echo "--------------------------------" + + if [ "${DAILYTAGTOTEST}" ]; then + SOFTWARETAG_SIM=${DAILYTAGTOTEST} + fi + + OUTPUT_FILE_FINAL="${WORKING_DIR}/${OUTPUT_FILE}_case${count}.sh" + + # create final test script with these values + cp "$TEMPLATE_FILE" "${OUTPUT_FILE_FINAL}" + for var in "${headers[@]}"; do + sed -i "s|%{$var}|${!var}|g" "$OUTPUT_FILE_FINAL" + done + # put the require spec + sed -i "s/%{JDL_REQUIREMENT}/${REQUIRE_STRING}/g" "$OUTPUT_FILE_FINAL" + + # we submit the test to the GRID (multiplicity of 4) + # ${WORKING_DIR}/submit_case${count}_${SOFTWARETAG_ASYNC//::/-} + echo "${O2DPG_ROOT}/GRID/utils/grid_submit.sh --prodsplit 4 --singularity --ttl 3600 --script ${OUTPUT_FILE_FINAL} --jobname "anchorTest_${count}" --wait-any --topworkdir 2tag_release_testing_${SOFTWARETAG_SIM}" > ${WORKING_DIR}/submit_case${count}.sh + # TODO: optional local execution with --local option + + ((count++)) # Increment counter for next row + done +} < "${INPUT_FILE_STRIPPED}" #Redirect file input here to avoid subshell issues + +cd ${WORKING_DIR} + +# now we submit all the jobs in the background and wait for them to return +for s in `ls submit*.sh`; do + echo "submitting ${s}" + export GRID_SUBMIT_WORKDIR="${WORKING_DIR}/${s}_workdir" + ( + bash ${s} &> log_${s} + echo "Job ${s} returned" + ) & +done + +# for for all (GRID) jobs to return +echo "Waiting for jobs to return/finish" +wait + +# verify / validate the output produced from these jobs +# The test is successfull if at least one subjob from each test +# produced the AO2D output. +echo "-- Jobs done ... validating --" + +FINAL_SUCCESS=0 +for s in `ls submit*.sh`; do + # find output path + TEST_OUTPUT_PATH="${WORKING_DIR}/${s}_workdir" # $(grep "Local working directory is" log_${s} | awk '//{print $5}') + + # get the Output path on JAlien from the JDL + ALIEN_OUTPUT_FOLDER=$(grep 'OutputDir' ${TEST_OUTPUT_PATH}/*.jdl | cut -d'"' -f2 | sed 's|/[^/]*#.*#.*$||') + + # see if there is an AO2D.root and a workflow.json in one of the jobs in that folder + AODS_FOUND=$(alien.py find ${ALIEN_OUTPUT_FOLDER} AO2D.root) + WORKFLOWS_FOUND=$(alien.py find ${ALIEN_OUTPUT_FOLDER} workflow.json) + + if [[ -z ${WORKFLOWS_FOUND} || -z ${AODS_FOUND} ]]; then + echo "❌ Missing files for case $s" + FINAL_SUCCESS=1 # mark as failure + else + echo "✅ Files found in $s" + fi +done + +if [[ ${FINAL_SUCCESS} -eq 0 ]]; then + echo "✅ All submissions have required files." +else + echo "❌ Some submissions are missing required files." +fi + +#TODO: echo "-- Cleaning up ... " +cd .. + +exit ${FINAL_SUCCESS} \ No newline at end of file