#
# Procmail Sanitizer perl script
# (C) 2001 John D. Hardin
# License: GPL/Artistic
# $Id: sanitizer.pl,v 0.16 2001-02-20 20:43:47-08 jhardin Exp jhardin $

use MIME::QuotedPrint;
use MIME::Base64;

# i18n and l10n support
use POSIX;
use Locale::PGetText;

sub initialize {
	if (setlocale(LC_CTIME) ne "C") {
		Locale::PGetText::setLocaleDir("/opt/sanitizer/locale");
		Locale::PGetText::setLanguage(setlocale(LC_CTIME));
	}

	$HOST = $ENV{"HOST"};		# assume procmail environment
	$dbgv = $ENV{"DEBUG_VERBOSE"};
	$dbg = $dbgv || $ENV{"DEBUG"};

	$INFO = gettext("INFO:");
	$CONF = gettext("CONF:");
	$WARN = gettext("WARN:");
	$FATAL = gettext("FATAL:");
	$DEFANG = gettext("DEFANGED");

	$default_policy = "SD";		# fail securely - strip and discard

	# zero out arrays
	@boundariestoolong =
	@gotboundaries =
	@mimeboundaries =
	@newboundaries =
	@nullboundaries =
	@rawboundaries =
	@Policy =
	@Headers = ();

	$sanhdr = "X-Content-Security: [${HOST}] ";

	# Do we have a quarantine mailbox?
	if ($Qbox = $ENV{"SECURITY_QUARANTINE"}) {
		$Qbox =~ s/(^\s+|\s+$)//g;
		die "$CONF \$SECURITY_QUARANTINE ", gettext("is not valid if relative"), "\n" if $Qbox !~ /^\//;
		die "$CONF \$SECURITY_QUARANTINE=\"$Qbox\": ", gettext("not a file"), "\n" unless -f $Qbox;
		die "$CONF \$SECURITY_QUARANTINE=\"$Qbox\": ", gettext("not writable"), "\n" unless -w $Qbox;
	}

	# Do we have a quarantine directory?
	if ($Qdir = $ENV{"SECURITY_QUARANTINE_DIR"}) {
		$Qdir =~ s/(^\s+|\s+$)//g;
		die "$CONF \$SECURITY_QUARANTINE_DIR ", gettext("is not valid if relative"), "\n" if $Qdir !~ /^\//;
		$Qdir =~ s/\/$// if $Qdir ne "/";
		die "$CONF \$SECURITY_QUARANTINE_DIR=\"$Qdir\": ", gettext("not a directory"), "\n" unless -d $Qdir;
		die "$CONF \$SECURITY_QUARANTINE_DIR=\"$Qdir\": ", gettext("not writable"), "\n" unless -w $Qdir;
		$default_policy = "SQ";		# fail securely - strip and quarantine
	}

	# Do we have a filename log?
	if ($Flog = $ENV{"SECURITY_FILENAME_LOG"}) {
		$Flog =~ s/(^\s+|\s+$)//g;
		die "$CONF \$SECURITY_FILENAME_LOG ", gettext("is not valid if relative"), "\n" if $Flog !~ /^\//;
		die "$CONF \$SECURITY_FILENAME_LOG=\"$Flog\": ", gettext("not a file"), "\n" unless -f $Flog;
		die "$CONF \$SECURITY_FILENAME_LOG=\"$Flog\": ", gettext("not writable"), "\n" unless -w $Flog;
		$default_policy .= "L";
	}

}

sub readpolicy {
	# Read the attachment security policy files
	# Create the Policy array
	my($list, $dir, $file, $glob, $policy);

	return if @Policy;	# create it only once

	warn "$INFO ", gettext("reading policies"), "\n" if $dbg;
	if ($list = $ENV{"SECURITY_POLICY"}) {
		if ($dir = $ENV{"SECURITY_POLICY_DIR"}) {
			$dir =~ s/(^\s+|\s+$)//g;
			die "$CONF \$SECURITY_POLICY_DIR ", gettext("is not valid if relative"), "\n" if $dir !~ /^\//;
			$dir =~ s/\/$// if $dir ne "/";
			die "$CONF \$SECURITY_POLICY_DIR=\"$dir\": ", gettext("not a directory"), "\n" unless -d $dir;
			die "$CONF \$SECURITY_POLICY_DIR=\"$dir\": ", gettext("not readable"), "\n" unless -r $dir;
		}
		foreach $file (split(/:/, $list)) {
			$file =~ s/^((\s+)|(\.+\/))+//;
			$file =~ s/\s+$//;
			next unless $file;
			unless ($file =~ /^\//) {
				die "$CONF \$SECURITY_POLICY_DIR ", gettext("not given, cannot find"), " $file\n" unless $dir;
				$file = "${dir}/${file}";
			}
			warn "$INFO ", gettext("policy"), " $file\n" if $dbg;
		        if (open(POLICY,"<$file")) {
				while (<POLICY>) {
					s/#.*//;
					if (($glob, $policy) = /^\s*(\S+)\s+(\S+)/) {
						$policy =~ s/^([a-z]*)O/$1/ig;	# ignore O, we know better
						unless ($policy =~ /^[a-z]*[AMSP]/i) {
							warn "$CONF ", gettext("no valid primary policy in"), " \"$policy\"\n";
							warn "$CONF ...file $file, glob $glob\n";
							warn "$CONF ...", gettext("defaulting to"), " $default_policy\n";
							$policy = $default_policy;
						}
						die "$CONF \$SECURITY_QUARANTINE_DIR ", gettext("not given"), "\n" if $policy =~ /^[a-z]*SQ/i && ! $Qdir;
						die "$CONF \$SECURITY_QUARANTINE ", gettext("not given"), "\n" if $policy =~ /^[a-z]*PQ/i && ! $Qbox;
						die "$CONF \$SECURITY_FILENAME_LOG ", gettext("not given"), "\n" if $policy =~ /^[a-z]*L/i && ! $Flog;
						$glob =~ s/([^\\])\./$1\\./g;
						$glob =~ s/\*/.*/g;
						$glob =~ s/\?/./g;
						push(@Policy, "${glob}\034${policy}");
						warn "$INFO $glob $policy\n" if $dbgv;
					}
				}
				close(POLICY);
			} else {
				warn "$WARN ", gettext("could not open"), " $file: $!";
			}
		}
		if ($dbgv) {
			warn "$INFO *** ", gettext("policy dump"), " ***\n";
			for (@Policy) {
				warn "$INFO $_\n";
			}
		}
	} else {
		warn "$WARN ", gettext("no policy files defined by"), " \$SECURITY_POLICY - ", gettext("defaulting to"), " $default_policy\n";
	}
}

sub checkfilename {
	# see if file has a policy
	# if not, default to $default_policy
	# if a Microsoft Office document, tag for the VBA scanner
	my($filename, $junk) = @_;
	my($glob, $policy, $dec, $enc, $enw, $scan, $handling);

	if ($filename =~ /^=\?.*\?=$/s) {
		# decode RFC2047-encoded text
		warn "$INFO ", gettext("decoding filename"), " \"$filename\"\n" if $dbg;
		$dec = "";

		while (($enc, $enw) = $filename =~ /^=\?[-\w]+\?([a-z])\?([^?\s\n]+)\?=[\s\n]*/is) {
			if ($enc eq "Q" || $enc eq "q") {
				$dec .= decode_qp($enw);
			} elsif ($enc eq "B" || $enc eq "b") {
				$dec .= decode_base64($enw);
			} else {
				warn "MIME: \"$filename\" ", gettext("uses unrecognized text encoding"), " \"$enc\"\n";
				$dec .= $enw;
			}
			$filename =~ s/^=\?[-\w]+\?[a-z]\?[^?\s\n]+\?=[\s\n]*//is;
		}
		$filename = $dec;
		warn "$INFO ", gettext("decoded filename"), " \"$filename\"\n" if $dbg;
		die "$FATAL ", gettext("catastrophic failure in"), " checkfilename()\n" unless $filename;
	}

	$handling = $scan = "";
	$scan = "O" if $filename =~ /\.(do[ct]|xl[swt]|p[po]t|rtf|pps)$/i;

	for (@Policy) {
		($glob, $policy) = split(/\034/);
		if ($filename =~ /^${glob}$/i) {
			$handling = "${scan}${policy}";
			last;
		}
	}

	unless ($handling) {
		warn "$WARN ", gettext("no policy found for"), " $filename - ", gettext("defaulting to"), " $default_policy\n";
		$handling = "${scan}${default_policy}";
	}

	if ($handling =~ /^[a-z]*L/i) {
		if (open (FLOG, ">>$Flog")) {
			print FLOG "[", time(), "] $filename\n";
			close(FLOG);
		}
	}
}

sub setMIMEboundary {
	# set the current MIME boundary string
	# save the previous
	# do length-limiting, etc.
	my($bndry, $junk) = @_;

	if ($gotboundary) {
		push @boundariestoolong, $boundarytoolong;
		push @mimeboundaries, $mimeboundary;
		push @newboundaries, $newboundary;
		push @nullboundaries, $nullboundary;
	}

	$bndry =~ s/(^"|"$)//g;
	$newboundary = $mimeboundary = $bndry;
	$boundarytoolong = $nullboundary = 0;
	$gotboundary++;
	warn "$INFO ", gettext("boundary"), " $gotboundary = \"$mimeboundary\"\n" if $dbgv;

	if ($boundarytoolong = (length($mimeboundary) > 80)) {
		warn "MIME: ", gettext("truncating long boundary string"), "\n";
		$newboundary = substr($mimeboundary,0,64);
	} elsif ($nullboundary = (length($mimeboundary) < 1)) {
		warn "MIME: ", gettext("replacing null boundary string"), "\n";
		$newboundary = "==NULL_MIME_BOUNDARY_SANITIZED-${HOST}-${$}-${gotboundary}==";
	}
	$mimeboundary = quotemeta($mimeboundary);
	warn "$INFO ", gettext("boundary"), " $gotboundary = \"$mimeboundary\"\n" if $dbgv;
}

sub popMIMEboundary {
	$gotboundary--;
	$boundarytoolong = pop @boundariestoolong;
	$mimeboundary = pop @mimeboundaries;
	$newboundary = pop @newboundaries;
	$nullboundary = pop @nullboundaries;
}

sub readheaders {
	# read lines until blank line
	# if line begins with whitespace, append to previous line
	# retain the whitespace and \n for writing it back out
	# add finished line to headers array
	my($hdr);

	@Headers = ();
	$hdr = "";

	while (<>) {
		if (/^\s*$/) {
			if (@Headers) {
				# blank line, end of headers
				if ($hdr) {
					warn "$INFO ", gettext("saved header"), " \"$hdr\"\n" if $dbgv;
					push(@Headers, $hdr);
				}
				return;
			} else {
				# skip blank lines before headers
				next;
			}
		} elsif (/^\s/) {
			# indented - append to prev partial hdr
			$hdr .= " $_";
		} else {
			# normal header line
			if ($hdr) {
				warn "$INFO ", gettext("saved header"), " \"$hdr\"\n" if $dbgv;
				push(@Headers, $hdr);
			}
			$hdr = $_;
		}
	}

	# Eep. should not hit EOF at this point.
	die "$FATAL ", gettext("catastrophic failure in"), " readheaders()\n";
}

sub doRFC822headers {
	# state: reading RFC822 headers and setting up to process body
	my($hdr, $type, $junk, $hasmime, $index);

	warn "$INFO ", gettext("reading RFC822 headers"), "\n" if $dbg;
	readheaders();

	$index = $hasmime = 0;
	foreach $hdr (@Headers) {
		warn "$INFO ", gettext("RFC822 header"), " \"$hdr\"\n" if $dbgv;
		if (($type, $junk) = $hdr =~ /^(Mime-Version|Date|Resent-Date|Message-ID|From|Status)\s*:\s+(.{256,})$/is) {
			warn "RFC822: ", gettext("truncating long header"), " ${type}: $junk\n";
			$hdr = "${type}: " . substr($junk, 0, 255) . "\n";
		}
		if (($type, $junk) = $hdr =~ /^(Subject|Return-Path|X-[-\w]+)\s*:\s+(.{513,})$/is) {
			warn "RFC822: ", gettext("truncating long header"), " ${type}: $junk\n";
			$hdr = "${type}: " . substr($junk, 0, 512) . "\n";
		}
		next if $hasmime;
		if ($hdr =~ /^MIME-Version\s*:\s/i) {
			$hasmime = 1;
			warn "RFC822: ", gettext("message has MIME formatting"), "\n";
			$junk = "X-Security: " . gettext("message sanitized on") . " $HOST\n";
			$junk .= "\t" . gettext("See") . " http://www.impsec.org/email-tools/procmail-security.html\n";
			$junk .= "\t" . gettext("for details.") . " \$Revision: 0.16 $x\$Date: 2001-02-20 20:43:47-08 $x\n";
			splice (@Headers, $index++, 0, $junk);
			unless ($Qdir && $Qbox) {
				$junk = "X-Security: " . gettext("the postmaster has not enabled quarantine support") . "\n";
				splice (@Headers, $index++, 0, $junk);
			}
		}
		$index++;
	}

	if ($hasmime) {
		# okay, sanitize the MIME bits
		doMIMEheaders(TRUE);	# process array rather than reading from input
	} else {
		for (@Headers) {
			print $_;
		}
		print "\n";
	}
}

sub doMIMEheaders {
	# state: reading MIME headers and sanitizing them
	# and setting up to process body
	my($alreadyread, $junk) = @_;
	my($hdr, $index, $mangle_mime_type, $filename, $newfilename, $policy, $recurse822);

	if ($alreadyread) {
		warn "$INFO ", gettext("Processing MIME headers"), "\n" if $dbg;
	} else {
		warn "$INFO ", gettext("Reading MIME headers"), "\n" if $dbg;
		readheaders();
	}

	$MIMEtype = $MIMEsubtype = "";
	$mangle_mime_type = $recurse822 = $index = 0;
	foreach $hdr (@Headers) {
		warn "$INFO ", gettext("MIME header"), " \"$hdr\"\n" if $dbgv;
		if (($MIMEtype, $MIMEsubtype, $junk) = $hdr =~ /^Content-Type\s*:.*\s([-\w]+)\/([^"\s]+)(;.*)?$/is) {
			warn "$INFO Content-Type: $MIMEtype/$MIMEsubtype\n" if $dbgv;
			if ($MIMEtype =~ /message/i && $MIMEsubtype =~ /rfc822/i) {
				warn "$INFO ", gettext("recursing into attached RFC822 message"), "\n" if $dbg;
				$recurse822 = 1;
			}
			if (($junk) = $hdr =~ /boundary[\s\n]*=[\s\n]*(("")|("[^"]+")|([^"]\S+))/is) {
				setMIMEboundary($junk);
				$hdr =~ s/${mimeboundary}/${newboundary}/ if $boundarytoolong;
				$hdr =~ s/boundary[\s\n]*=[\s\n]*""/boundary="${newboundary}"/is if $nullboundary;
			}
		}

	        $hdr =~ s/([^\\])\\"/$1\\\001/g;	# hide escaped quotes to simplify things

		if ($hdr =~ /`\s*`/) {
			warn "MIME: ", gettext("fixing double backquotes"), "\n";
			$hdr =~ s/`\s*`/\\"/g;
		}

		if ($hdr =~ /^Content-[-\w]+[\s\n]*:.*[\s\n](file)?name[\s\n]*=[\s\n]*"[^"]+$/is) {
			warn "MIME: ", gettext("fixing missing close quote on filename"), "\n";
			$hdr =~ s/$/"/;
		}

		while (($junk, $filename) = $hdr =~ /^Content-[-\w]+[\s\n]*:[^"]*("[^"]*"[^"]+)*[\s\n](file)?name[\s\n]*=[\s\n]*([^"\s\n]][^;]+)/is) {
			warn "MIME: ", gettext("fixing unquoted filename"), " \"$filename\".\n";
			$newfilename = $filename;
			$newfilename =~ s/\"/\\"/g;	# escape quotes embedded in filename
			if ($newfilename =~ /\([^)]*\)/) {
				warn "MIME: ", gettext("filename contains embedded RFC822 comment - removing"), "\n";
				$newfilename =~ s/\([^)]*\)//g;
			}
			$filename = quotemeta($filename);
			$hdr =~ s/name[\s\n]*=[\s\n]*${filename}/name="${newfilename}"/ig;
		}

		# TODO revisit this to check RFC2047 encoding side effects
		while (($filename) = $hdr =~ /^Content-[-\w]+[\s\n]*:.*[\s\n](file)?name[\s\n]*=[\s\n]*"([^"]{120})[^"]{16,}"/is) {
			warn "MIME: ", gettext("truncating long filename"), " \"${filename}...\".\n";
			$filename .= "...";
			$filename .= "?=" if $filename =~ /^=\?/;
			$hdr =~ s/name[\s\n]*=[\s\n]*"[^"]{128,}"/name="$filename"/is;
			$mangle_mime_type = 1;
		}

		$newfilename = 0;
		if ($hdr =~ /^Content-Type[\s\n]*:/is) {
			unless ($MIMEtype =~ /^(multipart|text|message)/i) {
				unless ($hdr =~ /^Content-[-\w]+[\s\n]*:.*[\s\n](file)?name[\s\n]*=[\s\n]*"/is) {
					warn "MIME: ", gettext("supplying default filename"), "\n";
					# this could be friendlier (see Anomy)
					$hdr .= "\tname=\"default.txt\";\n";
					$newfilename = 1;
				}
			}
		}

		if (!$newfilename && (($filename) = $hdr =~ /^Content-[-\w]+[\s\n]*:.*[\s\n](file)?name[\s\n]*=[\s\n]*"([^"]+)"/i)) {
			$policy = checkfilename($filename);
			warn "$INFO \"$filename\" ", gettext("policy is"), " \"$policy\"\n" if $dbg;
		}

		$hdr =~ s/\\\001/\\"/g;
		$index++;
	}

	for (@Headers) {
		print $_;
	}
	print "\n";

	if ($recurse822) {
		doRFC822headers();	
	}
}

sub dobodypart {
	# state: reading MIME body part
}

initialize();
readpolicy();

for (@ARGV) {
	print "$_ = ", checkfilename($_), "\n";
}
@ARGV = ();


