#!/usr/bin/perl
#
# Convert my categories file to procmail recipes.
#	- Cameron Simpson <cs@zip.com.au> 02mar2001
#
# Release version: 1.5
#

=head1 NAME

cats2procmailrc - generate a procmail recipe from my mail categories file

=head1 SYNOPSIS

cats2procmailrc [B<-D>] [B<-L>] [B<-a> I<alert>] [B<-m> I<maildomains>] [B<-t> I<type>] [B<-T>] E<lt>categories E<gt>procmailrc

=head1 DESCRIPTION

I<cats2procmailrc> reads a human friendly mail category description file
and emits a matching procmail(1) recipe to implement it.
The intent is to have an extremely succinct file
of easily editable one line rules,
generally of the form:

	folder	tag	pattern

I build my B<.procmailrc> like this:

	cats2procmailrc \
		-a 'announce-email "+%"' \
		-t mh -L \
		-m cskk.homeip.net \
		<categories >$HOME/.procmailrc

=head1 OPTIONS

=over 4

=item B<-D>

Insert a variation on the standard formail(1) recipe for deleting duplicates
on a per-folder basis.
For MH and Maildir folders, use a B<.msgid.cache> file within the folder,
otherwise use the file B<MAILDIR/.msgid.cache-I<folder>>.

=item B<-L>

Put the tag into the B<X-Label> header field
instead of as a prefix to the subject line.

=item B<-a> I<alert>

Specify a shell command to run as the alert function (see L<FILE FORMAT> below),
such as mailblink(1).
The character percent (B<'%'>) is replaced by the I<folder> name.

=item B<-m> I<maildomains>

Specify the default mail domain for rule addresses lacking an B<@I<domain>>.
The default comes from the environment variable B<$MAILDOMAIN>.
In fact this may be a list of domains if you have some equvalents,
separated by commas.

=item B<-t> I<type>

Specify the default mailbox type.
If the mailbox exists the type is deduced automatically,
but otherwise uses this default, which may be one of
B<unix>, B<maildir> or B<mh>.
The default is B<unix>.

=item B<-T>

Don't track rule matches.
Normally a C<X-Cats2Procmailrc-Matching-Rule> header
is inserted describing the match rule.
This is done with formail
and so can incur a noticable penalty if slow machines
with lots of messages.
If you're using the B<-L> option
the labels will mostly track stuff for you anyway.

=back

=head1 FILE FORMAT

=head2 LINE FORMAT

Blank lines and lines beginning with an octothorpe ('#') are ignored.

Lines of the form:

	VARIABLE=value

are passed through unchanged.

Lines of the form:

	< filename

interpolate raw procmailrc files.

Lines of the form:

	<< filename

interpolate category files.

Other lines are of the form:

	[+][!]folder tag pattern
	[+][!]who@where tag pattern
	[+][!]|progname tag pattern

A leading plus sign (B<'+'>) means that this rule should be a continue rule,
and not abort processing.

A leading exclaimation mark (B<'!'>) means that mail items matching this rule
should generate an alert.
The alert can be any action you desire, such as the mailblink(1) command;
see the B<-a> option.
For me, this means a single line summary of the mail item
appears in yellow in a small log window
I have at the top of my screen for mail items I should read I<now>.

The I<folder> names the folder in which to place the mail item.
Normal UNIX mail files,
MH style directories and Maildir style directories are autodetetected.
If the I<folder> contains an "at" (B<@>) character
it is considered an email address
and the mail item is forwarded to that address instead of being dropped in a folder.

If instead of a I<folder>
a pipe (`B<|>') and a program name are used
then the message is piped to the specified program.
The I<tag> is supplied as the sole command line argument to the I<progname>,
unless the I<tag> is "B<.>" in which case it is omitted.

The I<tag> is placed at the start of the mail's B<Subject> line for ready recognition in mixed folders
unless the B<-L> option is used,
in which case it is placed in the B<X-Label:> header line.
The special tag dot (".") suppresses this action.

The I<pattern> selects the mail items.

The I<folder> or I<who@where> or I<|progname> may be enclosed in double quotes
if desired,
for example if the folder name or program invocation contains whitespace.
Note that the quotes do not enclose the leading "B<+>" or "B<!>".

=head2 Example Patterns

Firstly, the special pattern "B<.>" always matches.
It's intended as a placeholder for ``continue'' rules that should always fire,
typically to feed every mail item to a program.

The usual form is simply an address (see RFC822 and RFC2822)
to be present in the B<To>, B<CC> or B<BCC> header lines.
If the address string lacks an at ("@") then an at ("@")
and the value of the environment variable B<$MAILDOMAIN>
is appended.
So I have rules like this:

	!attn	Personal	cs
	!attn	Personal	cameron@cskk.homeip.net
	attn	EFF-Cafe-News   cafe-news@eff.org
	crypto  AUCrypto        aucrypto@suburbia.net

matching B<cs@zip.com.au>, B<cameron@cskk.homeip.net>, B<cafe-news@eff.org>
and B<aucrypto@suburbia.net> respectively. The "Personal" items generate an alert.

It is also possible to select on a different header line
by prefixing the I<pattern> string with a header name, viz:

	applix  ApplixWare      sender:owner-applixware-list@applix.com

which selects for an address in the B<Sender> header line.

The I<pattern> may also be an arbitrary regular expression in egrep(1) syntax.
In this case the I<pattern> starts with a slash ("/")
and optionally ends in a slash, viz:

	!adzap  AdZap           subject:/(noads|add?[-  s]*zap|squid_?redir|zapp(er|ing)|wrapzap)/

which matches mail items with subjects regarding my adzap program.

A typical use of the "at" sign is in conjunction with the plus sign above
to copy particular mail items to another account while still delivering locally.
For example:

	+fred@that.isp.com . fred@this.isp.com
	fredmail Fred fred@this.isp.com

to accept mail aimed at B<fred@this.isp.com> and also copy it to B<fred@that.isp.com>.

=head1 ENVIRONMENT

MAILDIR, the directory containing mail folders.

MAILDOMAIN, the local mail domain (eg B<cskk.homeip.net> for my home domain).

=head1 SEE ALSO

procmail(1), procmailrc(1), procmailex(1), egrep(1)

=head1 AUTHOR

Cameron Simpson E<lt>cs@zip.com.auE<gt>

=cut

##BEGIN { use cs::DEBUG; cs::DEBUG::using(__FILE__);
##      }

use strict vars;
use Getopt::Std;

sub readcats($;$);

$::X_Label=0;
$::FolderDups=0;
$::DfltSfx="";		# unix style by default
$::DfltType=UNIX;	# unix style by default
@::DfltDom=$ENV{MAILDOMAIN};
undef $::DfltAlert;
$::TrackRules=1;
$::RcvStore='/usr/lib/nmh/rcvstore';	# path to MH rcvstore command

($::cmd=$0) =~ s:.*/::;
$::Usage="Usage: $::cmd [-a alert] [-m domains] [-t {unix|mh|maildir}] <categories >procmailrc
	-L		Use X-Label: header instead of Subject: for tag.
	-a alert	Command line to generate the alert.
			The mail item is present on standard input.
	-m domains	Select default mail domain(s).
			Default from \$MAILDOMAIN: @::DfltDom
	-t type		Select default mailbox type.
			Default: $::DfltType
	-T		Don't track rule matches by inserting an
			X-Cats2Procmailrc-Matching-Rule header.
";

{ my $badopts=0;

  if (! getopts("a:DLm:t:T"))
  { warn "$::cmd: bad options\n";
    $badopts=1;
  }

  if (defined $::opt_D)
  { $::FolderDups=1;
  }

  if (defined $::opt_L)
  { $::X_Label=1;
  }

  if (defined $::opt_a)
  { $::DfltAlert=$::opt_a;
  }

  if (defined $::opt_m)
  { @::DfltDom=grep(length,split(/[\s,|]+/, $::opt_m));
  }

  if (defined $::opt_t)
  { $::DfltType = uc($::opt_t);

    if ($::DfltType eq UNIX)
    { $::DfltSfx='';
    }
    elsif ($::DfltType eq MAILDIR)
    { $::DfltSfx='/';
    }
    elsif ($::DfltType eq MH)
    { $::DfltSfx='/.';
    }
    else
    { warn "$::cmd: -t: bad mailbox type \"$::DfltType\"\n\tI know: UNIX MAILBOX MH.\n";
      $badopts=1;
    }
  }

  if (defined $::opt_T)
  { $::TrackRules=0;
  }

  if (@ARGV)
  { warn "$::cmd: extra arguments: @ARGV\n";
    $badopts=1;
  }

  die $::Usage if $badopts;
}

$::_INCHANDLE="INCLUDE0";

my $doalert;
my $sfx;
my $type;
my $flags;
my $deliver;
my $cont;
my $lockfile;

readcats(STDIN);

exit 0;

sub readcats($;$)
{ my($FILE,$fname)=@_;

  local($_);

  my $rule;

  CAT:
  while (<$FILE>)
  {
    chomp;
    s/^\s+//;

    # pass blanks and comments through almost unchanged
    if (!length || /^#/)
    { print "$_\n";
      next CAT;
    }

    s/\s+$//;

    $rule=$_;

    # VARIABLE=value
    if (/^[a-z]\w*=/i)
    { print "$_\n";
      next CAT;
    }

    # << filename
    if (/^<<\s*/)
    { $_=$';
      s/\s+$//;
      my $incfh = ++$::_INCHANDLE;
      my $path = ( m:^/: ? $_ : defined($fname) ? pathdir($fname)."/$_" : $_ );
      if (! open($incfh, "< $path\0"))
      { warn "$::cmd: line $.: can't open \"$path\": $!\n";
	next CAT;
      }
      readcats($incfh,$_);
      close($incfh);
      next CAT;
    }

    # < filename
    if (/^<\s*/)
    { $_=$';
      s/\s+$//;
      if (! open(INCLUDE, "< $_\0"))
      { warn "$::cmd: line $.: can't open \"$_\": $!\n";
	next CAT;
      }
      print "\n";
      while (defined($_=<INCLUDE>))
      { print;
      }
      close(INCLUDE);
      print "\n";
      next CAT;
    }

    # implicit lockfile by default
    $lockfile='';

    # leading "+" means continue after rule
    $cont='';
    if (/^\+\s*/)
    { $cont='c';
      $_=$';
    }

    # leading "!" means alert this message
    $doalert=0;
    if (/^\!\s*/)
    { $doalert=1;
      $_=$';

      if (! defined $::DfltAlert || ! length $::DfltAlert)
      { warn "$::cmd: line $.: no alert shell command defined!\n";
	$doalert=0;
      }
    }

    # we expect "mailbox tag rule"
    if (! /^("[^"]*"|[^\s"]\S*)\s+(\S+)\s+(\S.*)/)
    { warn "$::cmd: stdin, line $.: bad line: $_\n";
      next CAT;
    }

    my($folder,$key,$ptn)=($1,$2,$3);
#    $folder =~ s/^"(.*)"$/$1/;          # FIXME: rknize: my version of procmail needs the quotes.
    $folder='attn' if $folder eq '.';

    my @hdrs;
    if ($ptn eq '.')
    { undef $ptn;
    }
    else
    {
      if ($ptn =~ /^(\w[-,\w]*):/)
      { $ptn=$';
	@hdrs=split(/,+/, $1);
      }
      else
      { @hdrs=('to','cc','bcc');
      }

      if ($ptn =~ m:^/(.*?)/?$:)
      { $ptn=$1;
	if ($ptn !~ /^\^/) { $ptn="$ptn"; }
      }
      else
      { $ptn.="\@.*".altre(@::DfltDom) if $ptn !~ /\@/;
	$ptn="\\<$ptn\\>";
      }

      $ptn =~ s/\.[^*]/\\$&/g;
      $ptn=".*$ptn";
    }

    if ($folder =~ /^\|/)
    { $deliver=($key eq '.' ? $folder : "$folder '$key'");
      undef $lockfile;
    }
    elsif ($folder =~ /\@/)
    { $deliver="! $folder";
      undef $lockfile;
    }
    else
    { $type=foldertype($folder);
      
## Cease use of rcvstore - supposedly procmail can do this without locks.
##      if ($type eq MH)
##      # procmail doesn't do .mh_sequences support
##      { $deliver="| $::RcvStore +$folder -unseen";
##	undef $lockfile; ## $lockfile="$ENV{MAILDIR}/$folder/.procmail-lock";
##      }
##      else
      undef $lockfile;
      { $sfx=typesfx($type);
	$deliver="$folder$sfx";
      }
    }

    $flags=($deliver =~ /^\|/ ? "w" : "");

    print ": 0$cont\n";
    if (defined($ptn))
    { my $hregexp = altre(@hdrs);
      print "* ^$hregexp:$ptn\n";
    }
    print "{\n";

    if ($::FolderDups && defined($lockfile) && $deliver !~ /^\!/)
    {
      my $cache = ( $type eq MH || $type eq MAILDIR
		  ? "$ENV{MAILDIR}/$folder/.msgid.cache"
		  : "$ENV{MAILDIR}/.msgid.cache-$folder"
		  );
      print <<X

  :0 Wh: $cache.lock
  | formail -D 819200 $cache
X
      ;
    }

    if ($::TrackRules)
    {
      print <<X

  :0whf
  | formail -f -A 'X-Cats2Procmailrc-Matching-Rule: $rule'
X
      ;
    }

    if ($doalert)
    { my $alertfn = $::DfltAlert;
      $alertfn =~ s/\%/$folder/g;
      print <<X

  :0hc
  | $alertfn
X
      ;
    }

    if ($key ne '.')
    # note that we strip the typical mailing list [tag] stuff if using our own key
    {
      if ($::X_Label)
      {
	print <<X

  :0whf
  | formail -f -A 'X-Label: $key'
X
	;
      }
      else
      {
	print <<X

  :0whf
  | sed -e 's/^Subject: *\\[[^ ]*\\] */Subject: /' -e 's/^Subject: *[Rr][Ee] *: *\\[[^ ]*\\] */Subject: Re: /' -e 's/^Subject:/& [$key]/'
X
	;
      }
    }

    if (defined $lockfile)
    { $flags.=":$lockfile";
    }

    print <<X

  : 0$flags
  $deliver
X
    ;

    print <<X
}

X
    ;
  }
}

sub typesfx
{ my($type)=@_;

  if ($type eq UNIX)	{ return ""; }
  if ($type eq MAILDIR)	{ return "/"; }
  if ($type eq MH)	{ return "/."; }
  die "$0: can't determine folder suffix for type \"$type\"";
}

sub foldertype
{ my($folder)=@_;

  $folder="$ENV{MAILDIR}/$folder" unless $folder =~ m:^/:;
  if (! stat($folder))
  { return $::DfltType;
  }

  if (! -d _)
  { return UNIX;
  }

  if (-e "$folder/.mh_sequences")
  { return MH;
  }

  return MAILDIR;
}

sub pathdir
{ local($_)=@_;
  return "." unless m:/:;
  s:/[^/]*$::;
  return $_;
}

sub altre
{ if (! @_)
  { my@c=caller;
    die "altere() with no args from [@c]";
  }
  return $_[0] if @_ == 1;
  return '('.join('|',@_).')';
}
