#!/bin/sh
#
#  Periodically monitor for jobs which has finished or failed but not
#  reported an exitcode
#
# usage: parse_fork_log control_dir ...

id=`id -u`

#debug='eval echo >> /tmp/parse-fork-log.$id'
debug=:

$debug "run at `date`"
$debug "options = $@"

if [ -z "$1" ] ; then exit 1 ; fi

# Where to store temporary files on gatekeeper
TMP_DIR=${TMP_DIR:-/tmp}

for control_dir in "$@" ; do

    if [ ! -d "${control_dir}" ]; then 
	echo "No control dir $control_dir" >&2
        continue
    fi

    for job in `grep -H INLRMS "${control_dir}/job.*.status" 2>/dev/null | sed -e 's/.*job.//' -e 's/.status.*$//'` ; do
        $debug "scanning job = $job"
        unset joboption_jobid
	unset joboption_user
        unset joboption_directory

        [ -f "${control_dir}/job.${job}.lrms_done" && continue

        [ ! -f "${control_dir}/job.${job}.grami" ] && continue
        .  "${control_dir}/job.${job}.grami"

        [ -z "$joboption_jobid" ] && continue
    
        $debug "local jobid = $joboption_jobid"
        $debug "local user  = $joboption_user"
    
        ps u $joboption_jobid | grep "^${joboption_user}"
        rc=$?
	$debug "ps returned $rc"
        [ $rc -eq 0 ] && continue

        exitcode=
	for file in "${joboption_directory}.diag" "${control_dir}/job.${job}.diag"; do
            $debug "checking $file"
    	    [ ! -f "$file" ] && continue
	    exitcode=`sed -n 's/^exitcode=\([0-9]*\).*/\1/p' $file`
    	    $debug "exitcode = [$exitcode] extracted from $file"
	    [ -n "$exitcode" ] && break
        done
    
	if [ -z "$exitcode" ]; then
            echo "Job $job with PID $joboption_jobid died unexpectedly" >&2
    	    exitcode=1
	fi
        $debug "got exitcode=$exitcode"
	echo "$exitcode" > "${control_dir}/job.${job}.lrms_done"
    done    

done

$debug "done, going to sleep"

sleep 120
exit 0
