@@ -13,14 +13,18 @@ Like datalad, but not linked to source control versioning
1313 OPTIONS:
1414 -o OUTPUT_FILE output, can repeat
1515 -i INPUT_FILE input file, can repeat
16+ -l FILELIST file to pull list of inputs from
17+ can be named pipe like: <(ls *.nii.gz)
18+ NB. newline in file names will be a problem
1619 -s|--stdout FILE capture stdoutput to FILE
1720 -e|--ifneeded only run if output(s) older than input(s)
1821 pnumoic for 'e': *e*xisting skipped
1922 or if hashes changed
2023 -d|--db DBPATH specify sqlite3 db path. Alt set env PNOTEDB
2124 if git, goes into .pnote.sqlite3
2225 otherwise \$ PWD/.pnote.sqlite3
23- -c|--sidecar write to first output file sidecar .output.pnote
26+ -C|--no-sidecar SKIP writing output file sidecar like .output.pnote
27+ -D|--no-db SKIP writing to DB
2428 -n|--dryrun save as setting DRYRUN=1. show don't do
2529TODO:
2630 * faketime for reproducabilty?
@@ -47,35 +51,29 @@ create table prov (
4751create table inf (hash text, stime int, file text, modtime int);
4852create table outf (hash text, stime int, file text);
4953create table meta (inittime datetime, version text, inithost text);
50- insert into meta values (datetime ('now'), '$PNOTEVER ', '$HOSTNAME ');"
54+ insert into meta values (unixepoch ('now'), '$PNOTEVER ', '$HOSTNAME ');"
5155
52- record_to (){
53- declare -g DBFILE
54- local backend=" ${1:? need provenance backend} " ; local cmd_output=${2:- $PWD }
55- case $backend in
56- db)
57- # PNOTEDB overwrite, otherwise git root, otherwise PWD
58- db=" ${PNOTEDB:- } "
59- [ -z " $db " ] &&
60- db=$( git rev-parse --show-toplevel || echo " $PWD " ) /.pnote.sqlite3
56+ set_db (){
57+ declare -g DBFILE DO_DB
58+ # disalbed DB has empty DBFILE
59+ if [ " ${DO_DB:- 1} " -ne 1 ]; then
60+ DBFILE=" "
61+ return 0
62+ fi
63+ # PNOTEDB overwrite, otherwise git root, otherwise PWD
64+ db=" ${PNOTEDB:- } "
65+ [ -z " $db " ] &&
66+ db=$( git rev-parse --show-toplevel || echo " $PWD " ) /.pnote.sqlite3
6167
62- if ! test -r " $db " ; then
63- echo " # NOTE: make '$db '" >&2
64- dryrun sqlite3 " $db " <<< " $SCHEMA"
65- fi
66- DBFILE=$db
67- ;;
68- sidecar)
69- sidecar=$( dirname " $cmd_output " ) /.$( basename " $cmd_output " )
70- # make we don't have a trailing slash to make this a directory
71- sidecar=$( sed ' s:/+$::' <<< " $sidecar" )
72- echo " $sidecar "
73- DBFILE=$sidecar
74- ;;
75- esac
68+ if ! test -r " $db " ; then
69+ echo " # NOTE: make '$db '" >&2
70+ dryrun sqlite3 " $db " <<< " $SCHEMA"
71+ fi
72+ DBFILE=$db
7673}
7774record_start (){
7875 declare -g DBFILE
76+ [ -z " $DBFILE " ] && return 0
7977 local cmd_hash=" $1 " ; shift
8078 local now=" $1 " ; shift
8179 local cmd=$( printf " %q" " $1 " ) ; shift
9088}
9189record_in (){
9290 declare -g DBFILE
91+ [ -z " $DBFILE " ] && return 0
9392 local h=${1:? cmd hash from start} ; shift
9493 local n=${1:? start time of command} ; shift
9594 # all other inputs are like 'moddate file'
@@ -108,6 +107,7 @@ record_in(){
108107}
109108record_out (){
110109 declare -g DBFILE
110+ [ -z " $DBFILE " ] && return 0
111111 local h=${1:? cmd hash from start} ; shift
112112 local n=${1:? start time of command} ; shift
113113 while [ $# -gt 0 ]; do
@@ -117,111 +117,169 @@ record_out(){
117117}
118118
119119record_done (){
120- local h=${1:? } n=${2:? } s=${3:? }
121- sqlite3 " $DBFILE " "
122- update prov set
123- etime=datetime('now'), status = '$s '
124- where hash='$h ' and stime='$s ';"
120+ declare -g DBFILE
121+ [ -z " $DBFILE " ] && return 0
122+ local h=${1:? } n=${2:? } s=${3:? }
123+ sqlite3 " $DBFILE " "
124+ update prov set
125+ etime=unixepoch('now'), status = '$s '
126+ where hash='$h ' and stime='$n ';"
125127}
126128
129+ # TODO: show sidecar version?
127130show_db (){
128- record_to " $BACKEND "
131+ declare -g DBFILE
132+ set_db
129133 sqlite3 -separator $' \t ' -header " $DBFILE " << HERE
130- select status, o.stime, etime, cmd from prov p join outf o on p.hash=o.hash where file like '$1 ';
134+ select p. status, strftime('%F %T',p.etime) etime, p. cmd from prov p join outf o on p.hash=o.hash where file like '$1 ' order by p.etime desc limit 1 ;
131135HERE
132136}
133137
134138_OUT=()
135139_IN=()
136- stdout=
137- ifneeded=0
138- BACKEND=" db"
139- while [ $# -ne 0 ]; do
140- case " $1 " in
141- show) show_db " $2 " ; shift 2; exit ;;
142- --) shift ; break ;;
143- -o|--output)
144- _OUT+=(" ${2:? $1 must be followed by an output file} " ); shift 2;;
145- --stdout|-s)
146- _OUT+=(" ${2:? $1 must be followed by an output file} " );
147- stdout=$2 ;
148- shift 2;;
149- -i|--input)
150- _IN+=(" ${2:? $1 must be following by an input file} " ); shift 2;;
151- -e|--ifneeded) ifneeded=1; shift ;;
152- -c|--sidecar) BACKEND=" db" ; shift ;
153- echo " sidecar not implemented yet" ; exit 1;;
154- -n|--dryrun) DRYRUN=echo; shift ;;
155- -d|--db) PNOTEDB=${2:? --db requires db path} ; shift 2;;
140+ STDOUT=
141+ IFNEEDED=0
142+ DO_SIDECAR=1
143+ DO_DB=1
144+ cmd_and_args=()
145+ pnote_parse (){
146+ declare -g _OUT _IN STDOUT IFNEEDED DO_SIDECAR DO_DB cmd_and_args
147+ while [ $# -ne 0 ]; do
148+ case " $1 " in
149+ --) shift ; break ;; # rest is command
150+ show) show_db " $2 " ; shift 2; exit ;;
151+ -o|--output)
152+ _OUT+=(" ${2:? $1 must be followed by an output file} " ); shift 2;;
153+ --stdout|-s)
154+ _OUT+=(" ${2:? $1 must be followed by an output file} " );
155+ STDOUT=$2 ;
156+ shift 2;;
157+ -i|--input)
158+ _IN+=(" ${2:? $1 must be following by an input file} " ); shift 2;;
159+ -l|--input_list)
160+ mapfile inlinst -t < " ${2:? must have input file list} "
161+ _IN+=(" ${inlinst[@]} " );
162+ shift 2;;
163+
164+ -d|--db) PNOTEDB=${2:? --db requires db path} ; shift 2;;
156165
157- # die on unknown options
158- -* )
159- echo " ERROR: unknown option $1 " >&2 ; exit 1;;
166+ -e|--ifneeded) IFNEEDED=1; shift ;;
167+ -n|--dryrun) DRYRUN=' echo' ; shift ;;
168+ # disable things
169+ -C|--no-sidecar) DO_SIDECAR=0; shift ;;
170+ -D|--no-db) DO_DB=0; shift ;;
171+
172+ # die on unknown options
173+ -* )
174+ echo " ERROR: unknown option $1 " >&2 ; exit 1;;
175+
176+ # anything else is likely a command
177+ * ) break ;;
178+ esac
179+ done
160180
161- # anything else is likely a command
162- * ) break ;;
163- esac
164- done
181+ cmd_and_args=(" $@ " )
182+ }
165183
166184stat_exist (){
167- default=${1? : missing file number. ' 0' for oldest; ' error' to break} ; shift ;
185+ default=${1:? missing file number. ' 0' for oldest; ' error' to break} ; shift ;
168186 for f in " $@ " ; do
169187 test -e " $f " && stat -c " %Y %n" " $f " && continue
170188
171189 [[ $default == " error" ]] && echo " ERROR: missing file '$f '" >&2 && return 1
172190 echo " $default $f "
173191 done
174192}
193+ mkhash (){
194+ machine=' '
195+ # maybe we want to include machine info? disabled for now
196+ : || machine=" $USER @$HOSTNAME :$PWD "
197+ md5sum <<< " $machine$*" | sed ' s/ \+-$//'
198+ }
175199
176- # cmd_and_args=("$(printf "%q " "$@")")
177- cmd_and_args=(" $@ " )
178- cmd_hash=$( md5sum <<< " $USER@$HOSTNAME:$PWD ${cmd_and_args[*]}" | sed ' s/ -$//' )
179- now=$( date +%s)
180- out_times=(" $now " ) # default for no output: set as always up-to-date
181- in_times=()
182- if [ ${# _IN[@]} -gt 0 ]; then
183- mapfile -t in_times < <( stat_exist error " ${_IN[@]} " | sort -n)
184- [ ${# in_times[@]} -ne ${# _IN[@]} ] &&
185- echo " ERROR: not all input files exist" >&2 &&
186- exit 1
187- fi
188- [ ${# _OUT[@]} -gt 0 ] &&
189- mapfile -t out_times < <( stat_exist 0 " ${_OUT[@]} " | sort -nr) # oldest first
190- newest_in=" ${in_times[0]:- 0// */ } " # remove name, just stat
191- oldest_out=" ${out_times[0]// */ } "
192- # TODO: if no output but still want run if needed?
193- oldest_out=${oldest_out?-0} # no output, make start of time.
194- # if newest output is older than newest input AND cmd_hash is same, nothing to do?
195- if [[ $ifneeded -eq 1 && \
196- -n " ${newest_in} " && -n " ${oldest_out} " && \
197- ${newest_in} -lt ${oldest_out} \
198- ]]; then # `# check cmd_hash?`
199- echo " # not rerunning: input '${in_times[0]} ' $( bc -l <<< " $oldest_out - $newest_in" ) seconds older than output '${out_times[0]} '" >&2
200- exit 0
201- fi
200+ # given 'path/x.txt'; return 'path/.x.txt.pnote'
201+ find_sidecar (){
202+ local sidecar cmd_output=${1:? output file}
203+ sidecar=$( dirname " $cmd_output " ) /.$( basename " $cmd_output " ) .pnote
204+ # make we don't have a trailing slash to make this a directory
205+ sidecar=$( sed ' s:/\+$::' <<< " $sidecar" )
206+ echo " $sidecar "
207+ }
208+ write_sidecar (){
209+ [ " ${DO_SIDECAR:- 1} " -ne 1 ] && return 0
210+ local cmd_output=${1:? output file needed} ; shift
211+ local cmd=${1:? command that was run needed} ; shift
212+ local input_files=" $* "
213+
214+ sidecar=$( find_sidecar " $cmd_output " )
215+ # test permissions
216+ s=0
217+ touch " $sidecar " 2> /dev/null || s=$?
218+ if [ $s -ne 0 ]; then
219+ echo " WARNING: cannot write to sidecar '$sidecar ' (err $s )" >&2
220+ return $s
221+ fi
222+ printf " %s # %s\n" " $cmd " " $input_files " >> " $sidecar "
223+ }
202224
203- # nothing to do, just exit
204- if [ -n " ${DRYRUN:- } " ]; then
205- echo " ${cmd_and_args[*]} "
225+ pnote_main (){
226+ pnote_parse " $@ " # set
227+ # cmd_and_args=("$(printf "%q " "$@")")
228+ cmd_hash=$( mkhash " $@ " )
229+ started=$( date +%s)
230+ out_times=(" $started " ) # default for no output: set as always up-to-date
231+ in_times=()
232+ if [ ${# _IN[@]} -gt 0 ]; then
233+ mapfile -t in_times < <( stat_exist error " ${_IN[@]} " | sort -n)
234+ [ ${# in_times[@]} -ne ${# _IN[@]} ] &&
235+ echo " ERROR: not all input files exist" >&2 &&
236+ exit 1
237+ fi
238+ [ ${# _OUT[@]} -gt 0 ] &&
239+ mapfile -t out_times < <( stat_exist 0 " ${_OUT[@]} " | sort -nr) # oldest first
240+ newest_in=" ${in_times[0]:- 0// */ } " # remove name, just stat
241+ oldest_out=" ${out_times[0]// */ } "
242+ # TODO: if no output but still want run if needed?
243+ oldest_out=${oldest_out:- 0} # no output, make start of time.
244+ # if newest output is older than newest input AND cmd_hash is same, nothing to do?
245+ if [[ $IFNEEDED -eq 1 && \
246+ -n " ${newest_in} " && -n " ${oldest_out} " && \
247+ ${newest_in} -lt ${oldest_out} \
248+ ]]; then # `# check cmd_hash?`
249+ echo " # not rerunning: input '${in_times[0]} ' $( bc -l <<< " $oldest_out - $newest_in" ) seconds older than output '${out_times[0]} '" >&2
206250 exit 0
207- fi
208-
209- # set DBFILE
210- record_to " $BACKEND "
211-
212- record_start " $cmd_hash " " $now " " ${cmd_and_args[*]} "
213- record_in " $cmd_hash " " $now " " ${in_times[@]} "
214- record_out " $cmd_hash " " $now " " ${_OUT[@]} "
215-
216- if [ -n " $stdout " ]; then
217- set +e # allow this to fail
218- " ${cmd_and_args[@]} " > " $stdout "
219- else
220- set +e # allow this to fail
221- " ${cmd_and_args[@]} "
222- fi
223- cmd_status=$?
224- set -e
225- record_done " $cmd_hash " " $now " " $cmd_status "
251+ fi
252+
253+ # nothing to do, just exit
254+ if [ -n " ${DRYRUN:- } " ]; then
255+ echo " ${cmd_and_args[*]} "
256+ exit 0
257+ fi
258+
259+
260+ set_db # DBFILE=sqlite location (PNOTEDB, git root, or PWD)
261+ record_start " $cmd_hash " " $started " " ${cmd_and_args[*]} "
262+ record_in " $cmd_hash " " $started " " ${in_times[@]} "
263+ record_out " $cmd_hash " " $started " " ${_OUT[@]} "
264+
265+ if [ -n " $STDOUT " ]; then
266+ set +e # allow this to fail
267+ " ${cmd_and_args[@]} " > " $STDOUT "
268+ else
269+ set +e # allow this to fail
270+ " ${cmd_and_args[@]} "
271+ fi
272+ cmd_status=$?
273+ set -e
274+ record_done " $cmd_hash " " $started " " $cmd_status "
275+
276+ # side cars
277+ for out in " ${_OUT[@]} " ; do
278+ test -z " $out " && continue
279+ ! test -r " $out " && echo " # WARNING: '$out ' expected but not made!" >&2 &&
280+ continue
281+ write_sidecar " $out " " ${cmd_and_args[*]} " " ${_IN[@]} " || :
282+ done
283+ }
226284
227- # TODO: check all output files exist
285+ eval " $( iffmain pnote_main ) "
0 commit comments