#!/bin/tcsh

### Scans the inputs and writes raw PCM files for later use by scan-many-eas-niced-raw.
###
### This version checks for the existence of the appropriate output file for any given
### input and skips that input if the output appears to be there already.  This makes it
### possible to effortlessly recover from a crash midway through processing a large set
### of inputs, and also makes it possible to simply add to the corpus if new inputs show
### up after we've scanned the directory---just run it again.  Note if you take a crash
### that leaves a partially-written output file, you'll have to nuke that file by hand,
### since we have no way of knowing that the output was incomplete without regenerating
### it anyway.  Note that we'll only report a dangling link if we were about to generate
### a file; this avoids lots of useless output on reruns (though you'll -still- see
### reports about dangling links that were -never- files, since we of course never wrote
### any output about them!).
###
### First arg is the directory for the results; all later args are inputs, which may
### also be directories (since they're scanned via ls).

set outdir = $1; shift

if (! -d $outdir) then
  echo "$1 must be a directory."
  exit 1
endif

foreach x (`ls $*`)
  set output = $outdir/`basename $x`
  if (! -e $output) then
    if (-l $x && ! -e `readlink $x`) then
      echo "`date`	Dangling link:  $x"	# Doesn't point anywhere, so can't scan it.
    else
      pcm-quiet $x > $output
      echo "`date`	$x" >>! ~/PCM.corpus
    endif
  endif
endif
end
