#!/bin/bash
#
# workscript.sh - GlueX simulation samples script for production
#                 of benchmark Monte Carlo data on the osg.
#
# author: richard.t.jones at uconn.edu
# version: february 27, 2021
#
# usage: [ run within a GlueX singularity container ]
#        $ ./workscript.sh <seqNo> [<offset>]

nthreads=8
nkeep=100000000
nskip=0
batch=1

xrootdserver=$(echo $1 | awk '{printf("nod%d.phys.uconn.edu",($1%5)+25)}')
xrootdURL="root://$xrootdserver"
gsiftpURL="gsiftp://$rootdserver"
remotepath="/Gluex/rest"
httpsURL="https://grinch.phys.uconn.edu:2843"
inputURL="https://gryphn.phys.uconn.edu/halld/bcal_timing-4-1-2023"
wget="wget --ca-directory=/etc/grid-security/certificates"

function usage() {
    echo "Usage: workscript.sh <seqNo> [<offset]"
    echo "  where <seqNo> = job sequence number, 1,2,..."
    echo "        <offset> = job sequence number offset, default 0"
    exit 1
}

function clean_exit() {
    ls -l 
    if [ "$1" = "" -o "$1" = "0" ]; then
        rm -f setup worklist dana_rest.hddm *.root hd_recon.config
        echo "Successful exit from workscript."
        exit 0
    fi
    echo "Error $1 in workscript, $2"
    while true; do
        msg=$(echo "Error $1 in workscript, $2" | sed 's/ /_/g')
        eval $($wget -O- "$inputURL/scripts/onerror?msg=$msg" 2>/dev/null)
        sleep 10
    done
    rm -f setup worklist dana_rest.hddm *.root hd_recon.config
    exit $1
}

function save_output() {
    maxretry=5
    retry=0
    while [[ $retry -le $maxretry ]]; do
        gfal-copy -f --copy-mode streamed file://`pwd`/$1 $httpsURL/$remotepath/$2 2>gfal-copy.err
        retcode=$?
        if [[ -s gfal-copy.err ]]; then
            cat gfal-copy.err
            retcode=$(expr $retcode + 256)
        fi
        rm gfal-copy.err
        if [[ $retcode = 0 ]]; then
            rm $1
            break
        elif [[ $retry -lt $maxretry ]]; then
            retry=$(expr $retry + 1)
            echo "gfal-copy returned error code $retcode, waiting $retry minutes before retrying"
            sleep $(expr $retry \* 60)
        else
            retry=$(expr $retry + 1)
            echo "gfal-copy returned error code $retcode, giving up"
        fi
    done
    # fall through to allow job file transfer return results, failure not fatal
    if [[ -f $1 ]]; then
        mv $1 $(basename $2)
    fi
    return 0
}

if [ $# -lt 1 -o $# -gt 2 ]; then
    usage
    exit 
fi

if [[ $# = 1 ]]; then
   seqNo=$(expr $1 + 1)
elif [[ $# = 2 ]]; then
   seqNo=$(expr $1 + $2 + 1)
else
   usage
fi
seqNo3=$(echo $seqNo | awk '{printf("%03d",$1)}')

echo "job $seqNo running on" $(hostname)

$wget -O setup $inputURL/scripts/setup.sh 2>/dev/null || clean_exit $? "cannot fetch setup.sh from web server"
$wget -O hd_recon.config $inputURL/config/hd_recon.config 2>/dev/null || clean_exit $? "cannot fetch hd_recon.config from web server"
$wget -O worklist $inputURL/config/worklist.in 2>/dev/null || clean_exit $? "cannot fetch worklist.in from web server"
infile=$(head -n $seqNo worklist | tail -n 1)

source ./setup || clean_exit $? "cannot execute setup.sh"

export CCDB_CONNECTION="sqlite:////cvmfs/oasis.opensciencegrid.org/gluex/group/halld/www/halldweb/html/dist/ccdb.sqlite"
export JANA_CALIB_URL=$CCDB_CONNECTION
export JANA_GEOMETRY_URL="ccdb://GEOMETRY/main_HDDS.xml"
export JANA_CALIB_CONTEXT="variation=default"

hd_root -p --config=hd_recon.config \
           --nthreads=$nthreads \
           -PJANA:BATCH_MODE=$batch \
           -PEVIO:PHYSICS_BLOCKS_TO_SKIP=$nskip \
           -PEVIO:PHYSICS_BLOCKS_TO_KEEP=$nkeep \
           -PNTHREADS=$nthreads \
           -PTHREAD_TIMEOUT_FIRST_EVENT=3600 \
           -PTHREAD_TIMEOUT=600 \
           -PTRK:SAVE_TRUNCATED_DEDX=1 \
       $xrootdURL/$infile || clean_exit $? "hd_root crashed"

outdir=$(echo $infile | awk -F/ '{i=NF-1;print $i}')
outrest=$(basename $infile | sed 's/smeared/rest/' | sed 's/hd_rawdata/dana_rest/' | sed 's/.evio/.hddm/')
outroot=$(basename $infile | sed 's/smeared/bcal_timing/' | sed 's/hd_rawdata/bcal_timing/' | sed 's/.evio/.root/' | sed 's/.hddm/.root/' )
save_output dana_rest.hddm $outdir/$outrest || clean_exit $? "save of dana_rest.hddm failed"
save_output hd_root.root $outdir/$outroot || clean_exit $? "save of hd_root.hddm failed"
clean_exit
