diff --git a/coastal/calib/schism_calib.cfg b/coastal/calib/schism_calib.cfg deleted file mode 100755 index 2bc61e4b..00000000 --- a/coastal/calib/schism_calib.cfg +++ /dev/null @@ -1,8 +0,0 @@ -export STARTPDY=20230611 -export STARTCYC=00 -export FCST_LENGTH_HRS=3.0 -export HOT_START_FILE='' -export USE_TPXO="NO" -export COASTAL_DOMAIN=pacific -export METEO_SOURCE=NWM_RETRO -export COASTAL_WORK_DIR=/efs/schism_use_case/sfincs_config_test/schism_2023-06-11T00-00-00Z diff --git a/coastal/calib/sing_run.bash b/coastal/calib/sing_run.bash index 27c7d5dc..b78d1c07 100755 --- a/coastal/calib/sing_run.bash +++ b/coastal/calib/sing_run.bash @@ -1,19 +1,53 @@ #!/usr/bin/env bash -#SBATCH --job-name=sing_mpi #job name -#SBATCH -N 2 #number of nodes to use -#SBATCH --partition=compute #the patition -#SBATCH --ntasks-per-node=18 #numebr of cores per node +#SBATCH --job-name=sing_mpi # job name +#SBATCH -N 2 # number of nodes to use +#SBATCH --partition=c5n-18xlarge # the partition +#SBATCH --ntasks-per-node=18 # number of cores per node #SBATCH --exclusive -export NODES=2 #this must match the number of nodes defined above by slurm -export NCORES=18 #this must match the number of cores per node defined above by slurm +export NODES=2 # this must match the number of nodes defined above by slurm +export NCORES=18 # this must match the number of cores per node defined above by slurm export NPROCS=$((NODES*NCORES)) -set -x +set -euox pipefail -#load the configuration file +# load the configuration file . ./schism_calib.cfg +# Check string variables +for var in STARTPDY STARTCYC COASTAL_DOMAIN METEO_SOURCE COASTAL_WORK_DIR RAW_DOWNLOAD_DIR; do + if [[ -z "${!var}" ]]; then + echo "ERROR: $var is not defined in config file" + exit 1 + fi +done + +# Check numeric variables +for var in FCST_LENGTH_HRS; do + if [[ -z "${!var}" ]] || ! [[ "${!var}" =~ ^[0-9]+$ ]]; then + echo "ERROR: $var must be a positive integer" + exit 1 + fi +done + +# Check YES/NO variables +if [[ "${USE_TPXO}" != "YES" ]] && [[ "${USE_TPXO}" != "NO" ]]; then + echo "ERROR: USE_TPXO must be YES or NO" + exit 1 +fi + +# Check that HOT_START_FILE is defined (but allow empty string) +if [[ ! -v HOT_START_FILE ]]; then + echo "ERROR: HOT_START_FILE must be defined (can be empty string '')" + exit 1 +fi + +# Optional: Validate HOT_START_FILE exists if provided +if [[ -n "${HOT_START_FILE}" ]] && [[ ! -f "${HOT_START_FILE}" ]]; then + echo "ERROR: HOT_START_FILE specified but file does not exist: ${HOT_START_FILE}" + exit 1 +fi + export NGWPC_COASTAL_PARM_DIR=/ngen-test/coastal/ngwpc-coastal export NGEN_APP_DIR=/ngen-app @@ -50,42 +84,44 @@ export FI_OFI_RXM_SAR_LIMIT=3145728 export FI_MR_CACHE_MAX_COUNT=0 export FI_EFA_RECVWIN_SIZE=65536 -# User specific aliases and functions +export NFS_MOUNT=/ngen-test + # >>> conda initialize >>> # !! Contents within this block are managed by 'conda init' !! -__conda_setup="$('/opt/conda/bin/conda' 'shell.bash' 'hook' 2> /dev/null)" +__conda_setup="$($NFS_MOUNT/ngen-app/conda/bin/conda 'shell.bash' 'hook' 2> /dev/null)" if [ $? -eq 0 ]; then eval "$__conda_setup" else - if [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then - . "/opt/conda/etc/profile.d/conda.sh" + if [ -f "$NFS_MOUNT/ngen-app/conda/etc/profile.d/conda.sh" ]; then + . "$NFS_MOUNT/ngen-app/conda/etc/profile.d/conda.sh" else - export PATH="/opt/conda/bin:$PATH" + export PATH="$NFS_MOUNT/ngen-app/conda/bin:$PATH" fi fi unset __conda_setup # <<< conda initialize <<< -# -export NFS_MOUNT=/efs -export PATH=/opt/conda/bin:${PATH} +export PATH=$NFS_MOUNT/ngen-app/conda/bin:${PATH} export CONDA_ENVS_PATH=$NFS_MOUNT/ngen-app/conda/envs export CONDA_ENV_NAME=ngen_forcing_coastal export PATH=${CONDA_ENVS_PATH}/${CONDA_ENV_NAME}/bin:${PATH} SIF_PATH=/ngencerf-app/singularity/ngen-coastal.sif +if [[ ! -f $SIF_PATH ]]; then + echo "ERROR: Singularity image file not found at $SIF_PATH" + exit 1 +fi conda activate ${CONDA_ENVS_PATH}/$CONDA_ENV_NAME -export LD_LIBRARY_PATH=/opt/conda/lib:${CONDA_ENVS_PATH}/lib:$LD_LIBRARY_PATH - +export LD_LIBRARY_PATH=$NFS_MOUNT/ngen-app/conda/lib:${CONDA_ENVS_PATH}/lib:${LD_LIBRARY_PATH:-} # -# location of the NWM retrospective or archieved forcing files +# location of the NWM retrospective or archived forcing files # note that the time span of the files must cover the whole simulation period #export NWM_FORCING_DIR=/efs/schism_use_case/hi_nwm_ana_forcing_20240913/ export NWM_FORCING_DIR=$RAW_DOWNLOAD_DIR/meteo/${METEO_SOURCE,,} #to lower case # -# location of the NWM retrospective or archieved streamflow files +# location of the NWM retrospective or archived streamflow files # note that the time span of the files must cover the whole simulation period if [[ ${METEO_SOURCE} == "NWM_RETRO" ]]; then export NWM_CHROUT_DIR=$RAW_DOWNLOAD_DIR/streamflow/nwm_retro @@ -96,9 +132,6 @@ else exit 1 fi -export MPICOMMAND2="mpiexec -n ${NPROCS} " -export MPICOMMAND3="mpiexec -n 4 " - declare -A coastal_domain_to_inland_domain=( \ [prvi]="domain_puertorico" \ [hawaii]="domain_hawaii" \ @@ -117,6 +150,12 @@ declare -A coastal_domain_to_geo_grid=( \ [atlgulf]="geo_em_CONUS.nc" \ [pacific]="geo_em_CONUS.nc" ) +# Validate COASTAL_DOMAIN is a recognized domain +if [[ -z "${coastal_domain_to_inland_domain[$COASTAL_DOMAIN]+x}" ]]; then + echo "ERROR: Unknown COASTAL_DOMAIN: $COASTAL_DOMAIN (valid: ${!coastal_domain_to_inland_domain[*]})" + exit 1 +fi + export SCHISM_ESMFMESH=${PARMnwm}/coastal/${COASTAL_DOMAIN}/hgrid.nc export GEOGRID_FILE=${PARMnwm}/${coastal_domain_to_inland_domain[$COASTAL_DOMAIN]}/${coastal_domain_to_geo_grid[$COASTAL_DOMAIN]} @@ -132,6 +171,16 @@ export BINDINGS="$NFS_MOUNT,$CONDA_ENVS_PATH,$NGWPC_COASTAL_PARM_DIR,/usr/bin/bc work_dir=${NGEN_APP_DIR}/ngen-forcing/coastal/calib +MPICOMMAND="mpiexec -n ${NPROCS}" + +run_in_container() { + singularity exec -B "$BINDINGS" --pwd "${work_dir}" "$SIF_PATH" "$@" +} + +run_in_container_mpi() { + ${MPICOMMAND} singularity exec -B "$BINDINGS" --pwd "${work_dir}" "$SIF_PATH" "$@" +} + start_itime=$(date -u -d "${STARTPDY} ${STARTCYC}" +"%s") end_itime=$(( $start_itime + $FCST_LENGTH_HRS * 3600 + 3600 )) export start_dt=$(date -u -d "@${start_itime}" +"%Y-%m-%dT%H-%M-%SZ") @@ -142,27 +191,6 @@ if [[ $USE_TPXO == "YES" ]]; then export COASTAL_SOURCE='' fi -#singularity exec -B $BINDINGS --pwd ${work_dir} $SIF_PATH \ -# /bin/bash -c \ -# '__conda_setup="$('/opt/conda/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"; \ -# if [ $? -eq 0 ]; then \ -# eval "$__conda_setup"; \ -# else \ -# if [ -f "/opt/conda/etc/profile.d/conda.sh" ]; then \ -# . "/opt/conda/etc/profile.d/conda.sh" ; \ -# else \ -# export PATH="/opt/conda/bin:$PATH" ;\ -# fi; \ -# fi; \ -# conda run -n $CONDA_ENV_NAME python \ -# ../forcing_downloader/main.py \ -# "$COASTAL_WORK_DIR" \ -# "${coastal_domain_to_nwm_domain[$COASTAL_DOMAIN]}" -# "$start_dt" \ -# "$end_dt" \ -# "$METEO_SOURCE" "nwm" "$COASTAL_SOURCE"' - -# # location of the archived STOFS file if STOFS data is # going to be used for the boundary nodes export STOFS_FILE='' @@ -170,8 +198,7 @@ if [[ $USE_TPXO == "NO" ]]; then export STOFS_FILE=$(ls -1 $RAW_DOWNLOAD_DIR/coastal/stofs/* | head -n 1) fi -singularity exec -B $BINDINGS --pwd ${work_dir} $SIF_PATH \ - ./run_sing_coastal_workflow_pre_forcing_coastal.bash +run_in_container ./run_sing_coastal_workflow_pre_forcing_coastal.bash export LENGTH_HRS=$FCST_LENGTH_HRS export FORCING_BEGIN_DATE=${STARTPDY}${STARTCYC}00 @@ -186,72 +213,43 @@ export COASTAL_FORCING_OUTPUT_DIR=$DATAexec/coastal_forcing_output export FECPP_JOB_INDEX=0 export FECPP_JOB_COUNT=1 -${MPICOMMAND3} singularity exec -B $BINDINGS \ - --pwd ${work_dir} \ - $SIF_PATH \ +run_in_container_mpi \ $CONDA_ENVS_PATH/$CONDA_ENV_NAME/bin/python \ $USHnwm/wrf_hydro_workflow_dev/forcings/WrfHydroFECPP/workflow_driver.py -singularity exec -B $BINDINGS \ - --pwd ${work_dir} \ - $SIF_PATH \ - ./run_sing_coastal_workflow_post_forcing_coastal.bash +run_in_container ./run_sing_coastal_workflow_post_forcing_coastal.bash -singularity exec -B $BINDINGS \ - --pwd ${work_dir} \ - $SIF_PATH \ - ./run_sing_coastal_workflow_update_params.bash +run_in_container ./run_sing_coastal_workflow_update_params.bash if [[ $USE_TPXO == "YES" ]]; then - singularity exec -B $BINDINGS \ - --pwd ${work_dir} \ - $SIF_PATH \ - ./run_sing_coastal_workflow_make_tpxo_ocean.bash + run_in_container ./run_sing_coastal_workflow_make_tpxo_ocean.bash else export CYCLE_DATE=$STARTPDY export CYCLE_TIME=${STARTCYC}00 - export LENGTH_HRS=$(singularity exec -B $BINDINGS \ - --pwd ${work_dir} \ - $SIF_PATH \ - ./run_sing_coastal_workflow_pre_make_stofs_ocean.bash) + export LENGTH_HRS=$(run_in_container ./run_sing_coastal_workflow_pre_make_stofs_ocean.bash) export ESTOFS_INPUT_FILE=$STOFS_FILE export SCHISM_OUTPUT_FILE=$DATAexec/elev2D.th.nc export OPEN_BNDS_HGRID_FILE=$DATAexec/open_bnds_hgrid.nc - ${MPICOMMAND3} singularity exec -B $BINDINGS \ - --pwd ${work_dir} \ - $SIF_PATH \ + run_in_container_mpi \ $CONDA_ENVS_PATH/$CONDA_ENV_NAME/bin/python \ $USHnwm/wrf_hydro_workflow_dev/coastal/regrid_estofs.py $ESTOFS_INPUT_FILE $OPEN_BNDS_HGRID_FILE $SCHISM_OUTPUT_FILE - singularity exec -B $BINDINGS \ - --pwd ${work_dir} \ - $SIF_PATH \ - ./run_sing_coastal_workflow_post_make_stofs_ocean.bash + run_in_container ./run_sing_coastal_workflow_post_make_stofs_ocean.bash fi -singularity exec -B $BINDINGS \ - --pwd ${work_dir} \ - $SIF_PATH \ - ./run_sing_coastal_workflow_pre_schism.bash - - -export PATH=/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin +run_in_container ./run_sing_coastal_workflow_pre_schism.bash -export LD_LIBRARY_PATH=/opt/amazon/openmpi/lib:/opt/amazon/openmpi/lib64 +# We want to use the Open MPI installed in the NFS_MOUNT instead of Conda's Open MPI +# for running SCHISM +export PATH=$NFS_MOUNT/openmpi/bin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin +export LD_LIBRARY_PATH=$NFS_MOUNT/openmpi/lib export OMPI_ALLOW_RUN_AS_ROOT=1 export OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 -${MPICOMMAND2} singularity exec -B $BINDINGS --pwd $COASTAL_WORK_DIR \ - $SIF_PATH \ +${MPICOMMAND} singularity exec -B "$BINDINGS" --pwd "$COASTAL_WORK_DIR" "$SIF_PATH" \ /bin/bash -c "/ngen-app/nwm.v3.0.6/exec/pschism_wcoss2_NO_PARMETIS_TVD-VL.openmpi $NSCRIBES" - -singularity exec -B $BINDINGS \ - --pwd ${work_dir} \ - $SIF_PATH \ - ./run_sing_coastal_workflow_post_schism.bash - - +run_in_container ./run_sing_coastal_workflow_post_schism.bash