#!/usr/bin/env perl
# preplace:  timestamp-preserving regular expression replacement.
# See usage() immediately below for usage information.

sub usage () {
  print STDERR <<END_OF_USAGE;
preplace: timestamp-preserving Perl-style regular expression replacement.
Invoke like:
  preplace [options] oldregex newreplacement [files]
If the files are omitted, all files under the current directory are used.
If a given file is a directory, then all files under it are used.
Args are:
  -name regex  only consider files whose names match the Perl regex;
                 applicable only if a list of files was not explicitly given
  -i.bak       back up each file with extension ".bak"
  -preserve    preserve the old timestamp even if replacement occurs
  --           end of command-line options; regexes follow (optional);
                 use this if OLDREGEX starts with a hyphen
  -help        print this message
  -debug       print debugging output
This is like
  perl -pi -e 's/OLD/NEW/g'
except that it applies to all files recursively and the timestamp on each file
is updated only if the replacement is performed.  If the replacement is not
performed (because the old regular expression did not match in the file), then
the timestamp is unmodified.  Thus, you can call it on as many files as you like
without losing the historical information about when a file was last modified.

END_OF_USAGE
}

# Called from the shell, it could be emulated as:
#   find . -type f -name '*.html' -print \
#     | xargs grep -l -P 'OLD' \
#     | xargs perl -pi.bak -e 's|OLD|NEW|g'
# or as (with sed, not Perl, regular expression syntax):
#   sed -i.bak 's/oldText/newText/g' `grep -ril 'oldText'`

# Problem: it only works for replacements on a single line; the OLD text
# cannot span lines (though the NEW text can).


use strict;
use English;
$WARNING = 1;

use Cwd 'abs_path';
use File::Find;

# use "-debug" switch to set
my $debug = 0;

my $fileregex;
my $backupsuffix = "";
my $preservedate = 0;
my $fromregex;
my $toregex;
my @filelist;

my $shortprogname = $0;
$shortprogname =~ s|.*/||;
my $forhelp = "for help, run:  $shortprogname -help";

while ((scalar(@ARGV) > 0) && ($ARGV[0] =~ /^-/)) {
  my $arg = shift @ARGV;
  if ($arg =~ /^-i/) {
    $backupsuffix = substr($arg, 2);
  } elsif ($arg eq "-name") {
    if (scalar(@ARGV) == 0) {
      die "'-name' option requires an argument; $forhelp\n";
    }
    $fileregex = (shift @ARGV);
  } elsif ($arg eq "-preserve") {
    $preservedate = 1;
  } elsif ($arg eq "--") {
    last;
  } elsif ($arg eq "-help") {
    usage();
    exit();
  } elsif ($arg eq "-debug") {
    $debug = 1;
  } else {
    die "preplace: unrecognized argument '$arg'; if OLDREGEX starts with a hyphen, precede it by '--'; $forhelp\n";
  }
}

if (scalar(@ARGV) < 2) {
  die "Not enough arguments (at least 2 required).\n$forhelp\n";
}

$fromregex = shift @ARGV;
$toregex = shift @ARGV;
if ($debug) {
  print STDERR "fromregex = $fromregex\n";
  print STDERR "toregex   = $toregex\n";
}

# Subroutine to set @filelist.  It's a callback; File::Find ignores its
# return value.
sub collect {
  my $fullname = $File::Find::name;
  if ($debug) {
    print STDERR "collect considering $fullname\n";
    # my $result = (-f $fullname);
    # if (!defined($result)) { $result = 0; }
    # print STDERR "-f $fullname = $result\n";
  }
  # Never consider version control directories.
  # (This needs to be documented and customizable.)
  if ((-d $fullname)
      && ($_ =~ /^(\.bzr|CVS|\.git|\.hg|\.svn)$/)) {
    $File::Find::prune = 1;
  }
  if ((-f $fullname)
      && ((! defined($fileregex)) || ($fullname =~ /$fileregex/o))) {
    if ($debug) {
      print STDERR "collected $fullname\n";
    }
    push(@filelist, $fullname);
  }
}

if (scalar(@ARGV) == 0) {
  push @ARGV, ".";
}

# Must pass an absolute filename, not ".", to collect(), because flags like "-e"
# and "-f" fail if a filename starts with "./" and contains any other "/".
my $pwd = `pwd`;
chomp($pwd);

@filelist = ();
for my $arg (@ARGV) {
  if ($arg eq ".") { $arg = $pwd; }
  $arg =~ s|^\./|$pwd/|;
  # Occurrences of abs_path() are because -f doesn't work on relative paths.
  # It also prevents `perl -i` from converting a symbolic link to a copy of the linked-to file.
  if (-d $arg) {
    find(\&collect, abs_path($arg));
  } elsif (-f abs_path($arg)) {
    push @filelist, abs_path($arg);
  } else {
    print STDERR "argument $arg is neither a file nor a directory\n";
  }
}

if ($debug) {
  print STDERR "filelist: @filelist\n";
}

foreach my $file (@filelist) {
  if ($debug) {
    print STDERR "file: <<$file>>\n";
  }
  if (! open(SEARCH, $file)) {
    print STDERR "Can't open file $file for read: $!\n";
    next;
  }
  my $match = 0;
  my $line;
  while (defined($line = <SEARCH>)) {
    if ($line =~ /$fromregex/o) {
      $match = 1;
      last;
    }
  }
  close(SEARCH);
  if ($match) {
    # Should rewrite in perl rather than invoking external command, I suppose.
    my $delimiter;
    my $fromtoregex = $fromregex . $toregex;
    if ($fromtoregex !~ m|/|) {
      $delimiter = "/";
    } elsif ($fromtoregex !~ m/\|/) {
      $delimiter = "|";
    } elsif ($fromtoregex !~ m/:/) {
      $delimiter = ":";
    } elsif ($fromtoregex !~ m/#/) {
      $delimiter = "#";
    } else {
      die "Cannot choose delimiter; regexes use too many special characters";
    }
    # Quote $file in case it contains whitespace.
    # Unfortunately, if the from or to regex contains a single quote, then the command is mangled.
    my $command = "perl -pi$backupsuffix -e 'use strict; s$delimiter$fromregex$delimiter$toregex${delimiter}g' '$file'";
    if ($debug) {
      print STDERR "command: $command\n";
    }
    my ($atime, $mtime) = (stat($file))[8,9];
    system($command);
    if ($preservedate) {
      utime $atime, $mtime, $file;
    }
  }
}
