#!/bin/bash

### Scans the directory named in $1 for new files appearing.
### For each one, executes $2.  The file will be the first
### arg given to the executed command.  Any args in $3 and
### later will be supplied as further arguments to $2, -after-
### the file we've discovered.  Note that the command is always
### executed in the background, so it won't hang up our processing
### loop.  [If it does I/O, it may hang!  We do try to redirect it
### all to /dev/null or a logfile, but still...]

### -Every- new file detected will be passed to the command.
### If you want to filter out only particular types of files,
### or hand different types to different processors, that's
### the responsibility of the command that was called.

### Note that this will notice ONLY regular files!  In particular,
### we're NOT noticing links---otherwise, what happens is that every
### time the migrator moves a file, we notice it and scan it -again-
### (this time over the network, just -after- it finished getting moved),
### and will (a) alert -twice- on an EAS file, or (b) read most or all
### of the file (depending on whether we can haul it back over the
### network before the inactivity timer fires on the symlink and we
### decide the file is dead).  Yucko all around.

## It's expected that the files we see are being constantly written
## (by mythbackend) -and read- (by pcm-tail-follow), so we can't use
## any of atime, ctime, or mtime to figure out what's going on.  (If
## POSIX had a true creation-time, that might do, but it doesn't.)
## Thus, here's what we do instead:
##
## We periodically scan the directory (at intervals dictated by the
## scan_period) for any file that appears to have an mtime newer than
## live_period.  Any such file is checked against our temporary directory
## (stored in a tmpfs so it's cleared on boot); if it also appears there,
## then we've noticed this file before, so all we do is retouch its shadow
## in the tempdir for later deadness detection.  If the file isn't there,
## then we've noticed this file for the first time, so we (a) execute the
## command, and (b) enter this file in our tempdir.
##
## When a file stops being updated for more than live_period, we'll stop
## touching its shadow in the tempdir.  We wait until the shadow is at
## lead dead_period in the past, and at that point, we assume that nothing
## else will be writing to the file, e.g., that it's finished.  We actually
## make this determination, not by scanning the original directory for
## all dead files (which might be a -very- large list, almost all of which
## have been dead "forever" and which we aren't doing any processing on and
## thus don't care about), but instead by checking the shadow in the tempdir,
## which the liveness detector has been regularly touching on each scan.

## For each dead file found in this way, we delete our entry in the tempdir,
## because we know we won't notice it again (since it's too old for the
## liveness detector).  Note that we -cannot- try to "optimize" this by
## having whatever is scanning the file delete the tempdir entry itself,
## because (for instance), if eas_detect finds an EAS alert anywhere in
## the file, it returns early, so its caller knows as soon as possible
## about the alert.  If the scanner were to delete the tempdir entry at
## that point, the very next scan here would spuriously decide that the
## file was still alive (after all, it's not in the tempdir and it's being
## written to), and it would call the scanner on it -again-, hence looping
## over such a file many times until it finally stopped being written.
##
## For this to work, scan_period must be less than live_period.
## (Actually, scan_period includes not only the sleep but the amount of
## time it takes us to look for live & dead entries.  However, it's hard
## to believe this won't be a negligible difference unless live_period is
## ridiculously small.)  We could set dead_period to scan_period + epsilon
## and it would also -probably- work, but it's safer to just make dead_period
## a minute and then we really know the file is dead if (a) it's been non-live
## long enough for its shadow to stop getting touched and (b) the shadow is
## now -also- old enough to exceed dead_period.

tmp=/dev/shm/nnf.$$	# Where to keep track of our shadow entries.
scan_period=10		# Seconds.  How long to delay in between each check.
live_period=1		# Minutes.  A file younger than this is still alive.
dead_period=10		# Minutes.  A file older than this + live_period is now dead.
			# See comment in file-being-written-p for why this is now 10.

mkdir -p $tmp			# This lives until the next boot, since we don't exit until then.

 dir="$1"; shift
 cmd="$1"; shift
rest="$@"

findlive() { find $1 -maxdepth 1 -mindepth 1 -type f -mmin -$2 -not -empty -print; }		# Don't jump the gun on a file with nothing in it yet.  Note -$2.
finddead() { find $1 -maxdepth 1 -mindepth 1 -type f -mmin +$2             -print; }		# All files in $tmp are zero-length, so don't do "-not -empty" here!  Note +$2.

# echo "Using dir:  $dir"
# echo "Executing command:  $cmd"
# echo "Rest arg:  $rest"

while [ 1 ]; do
  live="`findlive $dir $live_period`"
# echo "`date`  Live:  $live"			### DEBUG.
  if [ "$live" != "" ]; then
    for item in $live; do
      shadow=$tmp/`basename $item`
      if [ ! -e $shadow ]; then
#       $cmd $item $rest			### DEBUG.  Run in foreground.
        ( $cmd $item $rest & ) >> $tmp.log 2>&1	### DEBUG.  Log any stray output.
      fi					### (Use /dev/null in production.)
      touch $shadow
    done
  fi
  dead="`finddead $tmp $dead_period`"
# echo "`date`  Dead:  $dead"			### DEBUG.
  if [ "$dead" != "" ]; then
    for item in $dead; do
      rm $tmp/`basename $item`
    done
  fi
  sleep $scan_period
done

# End of file.
