#!/usr/bin/perl
#
my $revision = '$Id: Sanitizer.pm,v 1.90 2005/01/04 20:30:13 bre Exp $';
my $version = 'Anomy 0.0.0 : Sanitizer.pm';
#
##  Copyright (c) 2000-2005 Bjarni R. Einarsson. All rights reserved.
##  This program is free software; you can redistribute it and/or modify
##  it under the terms of the GNU General Public License as published by
##  the Free Software Foundation; either version 2 of the License, or
##  (at your option) any later version.
#
##############################################################################
#
# NOTE:  Sanitizer development is for the most part sponsored by
#        FRISK Software International, http://www.f-prot.com/.  Please
#        consider buying their anti-virus products to show your 
#        appreciation.
#
##############################################################################
#
# This is the email sanitizer engine.
#
# Most of the ideas in this script were borrowed from John D. Hardin's 
# "security through procmail" ruleset, which is available here:
# ftp://ftp.rubyriver.com/pub/jhardin/antispam/procmail-security.html
#
# Note that this script is a little differently licensed from the rest 
# of the Anomy tools because I borrowed GPL'd code from John's script.
#
# Documentation and new versions are here: http://mailtools.anomy.net/
#
#############################################################################
# Function naming conventions used in this file:
#
#   lowercase_names:   public object methods
#   BiCapitlizedNames: private object methods
#   CAPITALIZED_NAMES: functions (not assigned to an object)
#

##[ Package definition ]######################################################

package Anomy::Sanitizer;
use strict;
use Anomy::Log;
use Anomy::MIMEStream;
use Anomy::HTMLCleaner;
use IO::File;
use Digest::MD5;
use bytes;

BEGIN {
    use Exporter  ();
    use vars      qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
    $VERSION      = do { my @r = (q$Revision: 1.90 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
    @ISA          = qw(Exporter);
    @EXPORT       = qw( );
    @EXPORT_OK    = qw( );
};

##[ Default configuration ]###################################################

my $default_config = 
{
    # Features.
    # Disable stuff by replacing 1s with 0s.
    #
    "feat_verbose"    => 1,  # Warn user about unscanned parts and Other Stuff.
    "feat_log_inline" => 2,  # Inline logs: 0 = Off, 1 = Maybe, 2 = Force
    "feat_log_stderr" => 0,  # Print log to stderr.
	"feat_log_xml"    => 0,  # Attachment logs: XML format.
    "feat_log_trace"  => 0,  # Attachment logs: Complete (all trace info).
	"feat_log_after"  => 0,  # Scratch area sizes - see below. 0 = Off.
    "feat_files"      => 1,  # Enable filename-based policy decisions.
	"feat_sane_names" => 1,  # Make sure file names match the content-type.
	"feat_mime_files" => 0,  # Always check the mime-type's default name too.
    "feat_force_name" => 0,  # Force all parts (except text/plain or
                             # text/html parts) to have file names.
    "feat_boundaries" => 0,  # Replace all boundary strings with our own.
                             # NOTE:  Always breaks PGP/MIME messages!
    "feat_lengths"    => 1,  # Protect against buffer overflows.
    "feat_scripts"    => 1,  # Defang incoming shell scripts.
    "feat_html"       => 1,  # Defang active HTML content.
    "feat_html_noexe"    => 0, # Disallow links to executables
    "feat_html_unknown"  => 0, # Allow unknown HTML tags (default allow)
    "feat_html_paranoid" => 0, # Paranoid HTML Cleaner mode
	"feat_webbugs"    => 0,  # Defang "web bugs".
    "feat_trust_pgp"  => 0,  # Trust PGP signed messages -> don't scan them
    "feat_uuencoded"  => 1,  # Sanitized UU encoded attachments.
    "feat_forwards"   => 1,  # Sanitize forwarded messages.
    "feat_testing"    => 0,  # Enable to turn off randomness, for testcases
    "feat_fixmime"    => 1,  # Try and fix invalid MIME.
    "feat_kill_tnef"  => 0,  # Convert MS-TNEF attachments to MIME.
	"feat_no_partial" => 1,  # Defang any incoming message/partial mail. 
    "feat_paranoid"   => 0,  # Be very paranoid about MIME headers etc.
	"feat_newlines"   => 0,  # 0=Auto, 1=lf (Unix), 2=crlf (Win), 3=cr (Mac)
                             # ... 4=no newline mods
	
    "score_bad" => 0,        # Any message requring this many modifications 
	                         # will cause the sanitizer to return a non-zero
                             # exit code after processing the entire message.
							 # 0=off.

    # The "feat_log_after" feature tells the sanitizer to force the message 
	# to be multipart/* and reserve at least N-100 bytes within the header of
	# each text/plain or text/html part.  This allows the resulting output
	# file to be edited to add messages once the sanitization is completed,
	# without having to rewrite the entire file when short messages are
	# inserted.  A log-event named "scratch-space" is generated each time
	# such space is added, to allow a log-hook to record where (give or take
	# a few bytes) within the stream the space was added.

    ##########################################################################
    # If feat_files is non-zero, the following rules will be used to decide
    # what to do with an attachment.  The rules are all filename based, each
    # "list" being a regulaur expression.
    #
    # The file is compared to each list in order (1 to file_list_max) and on
    # the first match the defined policy is enforced.  If a file matches no
    # lists the default policy is used.
    #
    # Valid policies are:
    #
    #   mangle  - Completely ofbuscates the file name.
    #   defang  - Defangs the file name, without making it completely 
    #             illegible.
    #   accept  - Attachment is accepted as-is (possibly subject to 
    #             HTML or shell script defanging though).
    #   save    - Save the attachment to the "file_save_dir" directory,
    #             replace it with an informative message.
    #   drop    - Delete the attachment
    #   unknown - Indeterminate result, check the next policy.
    #   warn    - Same as unknown, but also increments the mod counter.
    #
	# Appending an exclamation mark (!) to any policy (e.g. drop!) will, when
	# matched, increase the modification counter past the score_bad threshold, 
	# to force the sanitizer to return a non-zero exit code.
	#
    # If a policy has four values, e.g. "save:save!:drop:save", then the file 
    # will be scanned for viruses using an external virus scanner.  Which of 
    # the four policies is used then depends on whether the result is "clean"
    # (1st), "successfully disinfected" (2nd), "unsuccessfully disinfected" 
    # (3rd) or "scan failed" (4th).
    #
    # The scanner definitions are as follows:
    #
    #   "e1:e2:e3:/path/to/scanner args ... %FILENAME ..."
    #
    # The e1, e2, e3 are comma-delimited lists of exit codes that match
    # the four different "interesting" return values we exped scanners to 
    # return.  Unexpected values are assumed to be in the "scan failed" 
    # category.
    #
	# In addition to the %FILENAME variable, the following variables are
	# also expanded: 
	#
	#    %ATTNAME         The name of the attachment itself.
	#    %REPLY_TO        The apparent reply-to address for the message.
	#    %ERRORS_TO       The apparent error-address for the message.
	#    %HEADER(<name>)  The named header of the top level message.  The
	#                     name must be in lowercase, ex: %HEADER(subject).
   	#

    # This is the file name template, used for creating (temporary?) files
    # when scanning or saving attachments.  The following substitutions are
    # supported:
    #               $d - Day of month (01-31)
    #               $m - Month number (01-12)
    #               $y - Two digit year (00-99)
    #               $Y - Four digit year
    #               $H - Hour (00-23)
    #               $M - Minute (00-59)
    #               $S - Second (00-59)
    #
    #               $P - This process's PID, in hex.
    #               $T - The current Unix time, in hex.
    #               $F - A safe version of the original file name.
    #               $  - A random character, from [A-Z0-9].
    #
    # It's recommended that all file name templates contain a few '$'
    # characters, since a new name will be generated (up to five times, 
    # after that it will give up) if the chosen one is already in use.  
    # More '$' substitions will mean fewer collisions.  Note that any
    # directories must exist, the sanitizer will NOT create them for you.
    # So if you are using random directory hashing make sure to create 
    # all the directories ahead of time!
    #
    "file_name_tpl" => '/tmp/att-$T-$$$-$F',

    # How many rules are available?
    "file_list_rules" => 15,

	# Black list 1:  Double-extension attacks and known trojans.
	# Upgrading the policy to "drop" or "save" is highly recommended.
    "file_list_1_scanner" => 0,
    "file_list_1_policy"  => "mangle",
    "file_list_1"         => '(?i)(\.'.
                             # Double extension executables files
	                         '([0-9a-z_]{2,4}\.(com|exe|pif|lnk|bat|sc[rt]|vb[se]?))'.
                             # Known trojans/worms currently in the wild.
                             '|(ants3set|wtc|readme|sslpatch)\.exe)\.?$',

    # Reserved for plugging in your favorite virus scanners and custom blacklists.
    "file_list_2_policy"  => 0, "file_list_2_scanner"  => 0, "file_list_2"  => 0,
    "file_list_3_policy"  => 0, "file_list_3_scanner"  => 0, "file_list_3"  => 0,
    "file_list_4_policy"  => 0, "file_list_4_scanner"  => 0, "file_list_4"  => 0,
    "file_list_5_policy"  => 0, "file_list_5_scanner"  => 0, "file_list_5"  => 0,
    "file_list_6_policy"  => 0, "file_list_6_scanner"  => 0, "file_list_6"  => 0,

	# Black list 2:  Executable files.  These /should/ be dropped...
    "file_list_7_scanner" => 0, 
	"file_list_7_policy"  => "defang", 
    "file_list_7"         => '(?i)\.(exe|com|cmd|bat|sys|vb[se]?|hta|shb|shs|hlp'.
	                         '|chm|eml|ocx|wsf|wsh|js|msi|msp|cpl|lib|pif|sc[rt]'.
							 '|lnk|dll)\.?$',

	# Reserved ...
    "file_list_8_policy"  => 0, "file_list_8_scanner"  => 0, "file_list_8"  => 0,

    # White list 1:  Static data - safe if anything is.
    "file_list_9_scanner" => 0,
    "file_list_9_policy" => "accept",
    "file_list_9" => '(?i)\.'.
	    # Graphics
	    '(gif|tiff?|jpe?g|pn[mg]|x[pb]m|dvi|e?ps|p(df|cx|fm)|fdf'.
	    '|fon|[ot]tf|bmp|ico'.
		# Sound
		'|mp\d|wav|au|ram?|avi|mov|mpe?g|aif[fc]?|cda|midi?|asf|wm[avf]'.
		# Plain text, compiled-language source code, HTML, etc.
		'|t(xt|ex)|csv|l(og|yx)|ini'.
	    '|[ch](pp|\+\+)?|cc|hh|s|inc|asm|pa(tch|s)|java|php\d?'.
		'|[ja]sp'.
		'|[sp]?html?|css|xml'.
        # Compressed?
		')(\.[gb]?z\d?)?\.?$',
		
	# Reserved ...
    "file_list_10_policy" => 0, "file_list_10_scanner" => 0, "file_list_10" => 0,

    # White list 2:  Necessary evils: Archives.
    "file_list_11_scanner" => 0,
    "file_list_11_policy" => "accept",
    "file_list_11" => '(?i)\.'.
		'(z(ip|oo)|ar[cj]|lh[az]|[tr]ar|r\d\d|rpm|deb|slp|tgz|cab'.
		'|iso|cif|uue?|jar'.
        # Compressed?
		')(\.[gb]?z\d?)?\.?$',
		
	# Reserved ...
    "file_list_12_policy" => 0, "file_list_12_scanner" => 0, "file_list_12" => 0,
	
    # White list 3:  Necessary evils: Microsoft Office files
    "file_list_13_scanner" => 0,
    "file_list_13_policy" => "accept",
    "file_list_13" => '(?i)\.'.
		# Microsoft Office documents
        '(do[tc]|xl[aswct]|p[po]t|pps|rtf|md[abw]'.
        # Compressed?
		')(\.[gb]?z\d?)?\.?$',
		
	# Reserved ...
    "file_list_14_policy" => 0, "file_list_14_scanner" => 0, "file_list_14" => 0,
	
    # White list 4:  Necessary evils: Miscellanious
    "file_list_15_scanner" => 0,
    "file_list_15_policy" => "accept",
    "file_list_15" => '(?i)\.'.
		# Email-related files - some of these may be somewhat risky.
		'(mbx|vcf|p7[sm]|ics|pgp|gpg|asc'.
		# Misc. data files.
		'|3ds|arg|dwg|dxf|dwt|dng|dbf|dcl|lsp|mp[apdwe]|psd|prc'.
		'|qt|stx|swf'.
        # Compressed?
		')(\.[gb]?z\d?)?\.?$',

    # This defines the default policy, for filenames that don't match
    # any of the preceding lists.
    "file_default_policy" => "defang",
    
    # The default name for files of an unrecognized MIME-type which lack
    # a file-name.  The default is to treat such files as text files, 
    # both for backwards compatibility and to increase the chances that
    # unnamed attachments get treated "safely" by the recpient's MUA.
    "file_default_filename" => "unnamed.txt",

    # Characters permitted in file names - the default is most of the
	# ISO-8859-1 character set.  In addition to these, the characters 
	# "." and "-" are always allowed.  Set to 0 to allow all characters.
    "file_characters" => '\ !\#\%\(\)\+,0-9;=\?A-Z\[\]_a-z\{\}\~'
	                    .''
						.''
						.'',

    # HTML cleaner configuration
	"html_cleaner_body"   => '"tag:div" => "p"',
	"html_cleaner_header" => '',

    # MIME attributes for inline sanitizer logs.
    "sanitizer_log_disp" => 'attachment; filename="sanitizer.log"',
    "sanitizer_log_type" => 'text/sanitizer-log; charset="iso-8859-1"',

    ##########################################################################
	# Headers to add if they are currently missing, since some email 
	# clients expect them to be present.  This only applies to the top-level
	# message headers.

	"force_header_1"  => "MIME-Version: 1.0",
	"force_header_2"  => "Content-Type: text/plain; charset=\"%DEF_CHARSET\"",
	"force_header_3"  => "Subject: (no subject)",
	"force_header_4"  => 0,
	"force_header_5"  => 0,
	"force_header_6"  => 0,
	"force_header_7"  => 0,
	"force_header_8"  => 0,
	"force_header_9"  => 0,
	"force_header_10" => 0,

    # Number of headers we are interested in forcing, 0 disables.
    "force_headers"   => 0,

    ##########################################################################

    # Messages.  Translate?
    #
    "header_info" => "X-Sanitizer: This message has been sanitized!",
    "header_url"  => "X-Sanitizer-URL: http://mailtools.anomy.net/",
    "header_rev"  => "X-Sanitizer-Rev: $revision",

    "var_def_charset" => "iso-8859-1",

    "msg_file_drop" => 
        "****\012NOTE:  An attachment was deleted from this part of the message,\012".
        "because it failed one or more checks by the virus scanning system.\012".
        "See the attached sanitization log for more details or contact your\012".
        "system administrator.\012\012".
        "The removed attachment's name was:\012\012".
        "\t%FILENAME\012\012".
        "It might be a good idea to contact the sender and warn them that\012".
        "their system is infected.\012****\012",

    "msg_file_save" => 
        "****\012NOTE:  An attachment was deleted from this part of the message,\012".
        "because it failed one or more checks by the virus scanning system.\012".
        "The file has been quarantined on the mail server, with the following\012".
        "file name:\012\012".
        "\t%SAVEDNAME\012\012".
        "The removed attachment's original name was:\012\012".
        "\t%FILENAME\012\012".
        "It is recommended that you contact your system administrator if you\012".
        "need access to the file.  It might also be a good idea to contact the\012".
        "sender, and warn them that their system may be infected.\012****\012",

    "msg_pgp_warning" => 
        "WARNING:  The following data has NOT been sanitized, to ensure\012".
        "          that the signature remains intact, if valid.  Please\012".
        "          be careful if you open any enclosed attachments.\012\012",

    "msg_log_prefix" =>
        "This message has been 'sanitized'.  This means that potentially\012".
        "dangerous content has been rewritten or removed.  The following\012".
        "log describes which actions were taken.\012",

    "msg_usage" =>
        "$version\012$revision\012\012".
        "Usage: sanitizer.pl [ 'variable op value' | 'filename' ] ... \012".
        "\012".
        "FIXME:  unwritten\012",
    
    "msg_defanged" => "DEFANGED",
    "msg_blacklisted" => "BLACKLISTED",

    "msg_current" =>
        "Current configuration:\012",

    "msg_signature" => "$version\012$revision\012",

    # Limits
    "max_conf_recursion" => 5,
	"max_mime_depth"     => 20,

    # System mime types...
	"system_mime_types" => "/etc/mime.types",
    "system_io_file"    => "IO::File",
		
	## DEPRACIATED STUFF ##
    "score_bad_code"   => "UNUSED",
    "msg_panic"        => "UNUSED",
    "score_panic_code" => "UNUSED",
    "score_panic"      => "UNUSED",
    "html_evil_tags"   => 'UNUSED',
    "html_javascript"  => 'UNUSED',
};

my @default_configs = 
(
    $default_config,
);

# Default file-names for unnamed MIME parts.
#
my $default_filenames = 
{
    "text/plain"                      => "unnamed.txt",
    "text/html"                       => "unnamed.html",
    "image/gif"                       => "unnamed.gif",
    "image/jpeg"                      => "unnamed.jpg",
    "image/png"                       => "unnamed.png",
    "image/tiff"                      => "unnamed.tiff",
    "inline/text/plain"               => "unnamed.txt",
    "inline/text/html"                => "unnamed.html",
    "application/ms-tnef"             => "winmail.dat",
	"application/x-ms-dos-executable" => "unnamed.exe",
	"application/x-msdownload"        => "unnamed.exe",
};

# This is a list of MIME types, and regular expressions which file-names
# are expected to match if that type is being used.
#
my $default_name_type_checks = 
{
    "audio/x-ms-wma" => '(?i).wm[af]$',
    "audio/x-wav"  => '(?i).wav$',
    "audio/wav"    => '(?i).wav$',
    "audio/x-midi" => '(?i).midi?$',
    "image/gif"    => '(?i).gif$',
    "image/jpeg"   => '(?i).jpe?g$',
    "image/png"    => '(?i).png$',
    "image/tiff"   => '(?i).tiff?$',
};

# Default parser hash
my $default_parsers =
{
    # Search text parts for inline uuencoded attachments, so we can mangle
	# their names and possibly scan the attachments themselves.  This also
	# takes care of defanging HTML.
	"text/*" => \&CleanUnknown,
	"application/pgp" => \&CleanText,
	"inline/text/*" => \&CleanUnknown,

    # Check headers, parse contents...
	"multipart/*" => \&CleanMultipart,

    # Sanitize encapsulated messages.
	"message/rfc822" => \&CleanRFC822,
	"inline/message/rfc822" => \&CleanRFC822,
	"message/partial" => \&CleanPartial,

    # Sanitize contents of MS-TNEF attachments
    "application/ms-tnef" => \&CleanMS_TNEF,
    "inline/application/ms-tnef" => \&CleanMS_TNEF,

    # We don't recognize this content-type, see if we can figure anything
	# out from the headers themselves.  Sanitize headers (at least).
	"DEFAULT" => \&CleanUnknown,
};

# Built in scanners
my $default_scanners = 
{
};


##[ Public interface routines ]###############################################

sub new
{
    my ($proto, $name) = @_;
	my $class = ref($proto) || $proto;

	my $self = {
		# Configuration
		"conf"      => { },
		"defaults"  => [ @default_configs ],
		"parsers"   => { },
		"filenames" => { },
		"scanners"  => { },

		# Object globals
	    "log"         => new Anomy::Log,
		"mod_id"      => 0,
		"base_mod_id" => 0,
		"logd_mod_id" => 0,
		"mime_depth"  => 0,

		# Common part data, exposed.
		"common"      => undef,
		"message"     => undef,
		
		# Have we created any scratch-spaces yet?
		"scratch-spaces" => 0,
		
		# Errors
		"error" => undef,
	};
	bless ($self, $class);

	$self->reset_config();
	%{ $self->{"parsers"} }          = %{ $default_parsers };
	%{ $self->{"filenames"} }        = %{ $default_filenames };
	%{ $self->{"name_type_checks"} } = %{ $default_name_type_checks };
	%{ $self->{"scanners"} }         = %{ $default_scanners };

	return $self;
}

sub error
{
    my $self = shift;
	return $self->{"error"};
}


# Parses a single line from a configuration file.
# Configuration files look like this:
#
#   # this is a comment
#   # set some variables
#   variable = value
#   variable += value
#   # load another configuration file
#   /path/to/another/configuration/file
# OR:
#   include /path/to/another/configuration/file
#
# All white space in the value is replaced with spaces.   A " #" sequence
# (white space followed by '#') marks the beginning of a comment, and is
# ignored.  
#
# The following escape sequences are expanded in the value string to let
# you get around these "features":
#
#       \\  ->  \
#       \#  ->  #
#       \n  ->  newline
#       \t  ->  tab
#       \s  ->  space
#
# Using the .= or += instead of just = will append the string to the
# variable, instead of resetting it.
#
my $config_recursion = 0;
sub reset_config
{
    my $self = shift;
    $config_recursion = 0;

    foreach my $c (@{ $self->{"defaults"} })
    {
        foreach my $v (keys(%{ $c }))
	    {
	        $self->{"conf"}->{$v} = $c->{$v};
	    }
    }
}
sub configure
{
    my $self = shift;
	my $conf = $self->{"conf"};

    while (my $line = shift)
    {
	    if ($line =~ /^\s*before\s+(\d+)\s+(.*)$/i)
		{
			my ($date, $stuff) = ($1, $2);
			$line = '#'.$line;
			$line = $stuff if ($date > time());
		}
	    if ($line =~ /^\s*after\s+(\d+)\s+(.*)$/i)
		{
		    my ($date, $stuff) = ($1, $2);
			$line = '#'.$line;
			$line = $stuff if ($date <= time());
		}
		
        if ($line =~ /^\s*([a-z0-9_]+)\s*([\+\.]*=)\s*(.*)\s*$/si)
		{
            # OK, this lookes like a variable configuration
            my ($var, $op, $val) = (lc($1), $2, $3);

            unless (($var =~ /^var_/i) || (defined $conf->{$var})) 
            {
                $self->{"error"} .= "Unknown configuration variable: $var\n";
				return 1;
            }

            $val =~ s/\s+#.*$//;
            $val =~ tr/\t\n/  /;
            $val =~ s/\\([\\nts#])/ { $_=$1; tr|\\nts#|\\\012\t #|; $_ } /eg;

            if ($op eq '=')
            {           
                $conf->{$var} = $val;
            }
            else
            {
                $conf->{$var} .= $val;
            }
        }
        elsif ($line !~ /^\s*(#.*)?$/)
        {
            # Ooh, we're supposed to include another configuration file!

            $config_recursion++;
			if ($config_recursion > $conf->{"max_conf_recursion"})
			{
			    $self->{"error"} .= "Configuration files nested too deep!\n";
				$config_recursion--;
				return 2;
			}

            my $fn = $line;
			my $optional = 1 if ($fn =~ s/^\s*(\?|try)\s*//);
			$fn =~ s/^\s*include\s+//i;
            $fn =~ s/^\s*(.*)\s*$/$1/;

            local *CF;
			unless ((-r $fn) && (open (CF, "< $fn")))
			{
				$config_recursion--;
			    next if ($optional);
			    $self->{"error"} .= "Cannot read $fn: $!\n";
				return 3;
			}

            my $ln = 0;
            while (my $cl = <CF>)
            {
                $ln++;
				$cl =~ s/\015?\012$//s;
                if ($self->configure($cl))
                {
                    $config_recursion--;
					$self->{"error"} .= "[$config_recursion] Error in $fn, line $ln.\n";
					$config_recursion--;
					return 4;
                }
            }
            $config_recursion--;
        }
    }
	
	if (0 == $config_recursion)
	{
	    # Side effects of configuration...
		if ($conf->{"feat_trust_pgp"})
		{
            # Only scan headers of signed stuff, if we decide that we
			# trust messages that are signed or encrypted.
			#
			$self->{"parsers"}->{"multipart/signed"} = \&CleanHeaders;
			$self->{"parsers"}->{"multipart/encrypted"} = \&CleanHeaders;
		}

		# Get a bunch of default filenames from /etc/mime.types
		$self->load_mime_filenames($conf->{"system_mime_types"});
	}

    return undef;
}

sub get_config_text
{
    my $self = shift;
	my $conf = $self->{"conf"};

    my $cfg = "";
    for my $key (sort(keys(%{ $conf })))
    {
        my $val = $conf->{$key};
		$key = sprintf("%-20s", $key);
    
        $val =~ s/\012/\\n\n$key += $1/g;
		$val =~ s/\t/\\t/g;
		$val =~ s/\#/\\#/g;
		$val =~ s/\\\\/\\/g;
#       $val =~ s/(.{55})/$1/g;
        $val =~ s/\+=  /+= \\s/g;
		$val =~ s/ \012/\\s\n/g;
		$val =~ s/\012\S+\s+[\+\.]?=\s*$//ms;
    
	    $cfg .= $key ."  = ". $val ."\n";
    }
    return $cfg;
}

sub get_msg
{
    my ($self, $var) = @_;
    return $self->{"conf"}->{"msg_".lc($var)};
}
sub expand_msg
{
    my ($self, $msg) = @_;
    return $self->expand($self->get_msg($msg));
}

sub get_var
{
    my ($self, $var) = @_;
    return $self->{"conf"}->{"var_".lc($var)};
}
sub expand_var
{
    my ($self, $var) = @_;
    return $self->expand($self->get_var($var));
}
sub set_var
{
    my ($self, $var, $value) = @_;
    $self->{"conf"}->{"var_".lc($var)} = $value;
}

sub expand
{
    my ($self, $msg) = @_;
    $msg =~ s/%([A-Z_]+)/ $_=($self->{"conf"}->{"var_".lc($1)} || "%".$1) /ge;
    return $msg;
}

sub sanitize
{
    my ($self, $fh_in, $fh_out) = @_;
	my $conf = $self->{"conf"};

    # Create MIMEStream object
    my $message = Anomy::MIMEStream->New($fh_in, $fh_out, 
	                                     $self->WrappedParsers());

    # Record the common data for use by hooks.
    $self->{"common"} = $message->{"common"};
    $self->{"message"} = $message;

    # Make sure we've already loaded our IO::File module	
    eval 'use '.$conf->{system_io_file}.';';

	# What's our newline preference today?
	if (my $n = $conf->{"feat_newlines"})
	{
	    $message->{"newline_out"} = "\012"     if ($n == 1);
	    $message->{"newline_out"} = "\015\012" if ($n == 2);
	    $message->{"newline_out"} = "\015"     if ($n == 3);
		$message->{"newline_in"}  = undef      if ($n == 4);
		# Default is auto...
	}
		
	# Register this run with the core logger.
	my $start = time();
	$start = 0 if ($conf->{"feat_testing"});
	$self->{"log"}->sublog("Sanitizer", SLOG_TRACE, 
	                       { start => $start }, $message->{"log"});

    # Parse message header
    $message->ParseHeader();

    # Log that we've parsed the header - this creates an event allowing
	# hooks to reconfigure most of the sanitizer based on header data.
	# WARNING: MIMEStream parsers are NOT reconfigurable at this point.
    $message->{"log"}->entry("parsed_header", SLOG_TRACE, undef,
	                         "Finished parsing message header.");

    # Initialize modification ID counter.
    $self->{"mod_id"} = ($$ * 1001) % (10 ** (rand() * 6));

    # Disable randomness when testing
    $self->{"mod_id"} = 99 if ($conf->{"feat_testing"});
	
    # Store initial value for comparisons later.
    $self->{"base_mod_id"} = $self->{"mod_id"};
    $self->{"logd_mod_id"} = $self->{"mod_id"};

    # Append blurb to header.
    {
        my $t;
		chomp $message->{"rawheader"};
		$message->{"rawheader"} .= $self->expand("$t\012") if ($t = $conf->{"header_info"});
		$message->{"rawheader"} .= $self->expand("$t\012") if ($t = $conf->{"header_url"});
		$message->{"rawheader"} .= $self->expand("$t\012") if ($t = $conf->{"header_rev"});
		
		for my $n (1..$conf->{"force_headers"})
		{
		    my $h = $conf->{"force_header_$n"};
		    next unless $h;
		    my ($hdr, $val) = split(/:\s+/, $h);
			unless ($message->{"headers"}->{lc($hdr)})
			{
			    $message->{"rawheader"} .= $self->expand("$h\012");
				$message->{"log"}->entry("force_header", SLOG_WARNING, 
				                         { header => $hdr, value => $val },
				                         "Added default value (%value%) for header %header%");
			}
		}
		
		$message->{"rawheader"} .= "\012";
    }

	# Force message to be multipart/mixed, if it isn't already that or
	# plain text which we know we can append a log to.
	if ((($conf->{"feat_log_after"}) && 
	     ($message->{"mime"}->{"_type"} !~ /^multipart\//i)) ||
		(($conf->{"feat_log_inline"} > 1) && 
	     ($message->{"mime"}->{"_type"} !~ /^(multipart\/mixed|text\/(plain|html))$/i)))
	{
	    $message->{"log"}->entry("forced-multipart", SLOG_WARNING, undef,
		                         "Forcing message to be multipart/mixed, to facilitate logging.");

        my $mt = lc($message->{"mime"}->{"_type"});
        $message->{"parsers"}->{"ORG/$mt"} = $message->{"parsers"}->{"$mt"};
		$message->{"parsers"}->{"$mt"} = sub { return $self->WrapWithMultipart(@_); };
	}

    # Parse everything else!
	$message->ParseBody();

    # Dump log to STDERR if requested, things like that.
    $self->DumpLog($self->{"log"}, undef, 1);

    my $changes = ($self->{"mod_id"} - $self->{"base_mod_id"});
	return 1 if ($changes > 1000000);  # 1000000 is magic ...
	return 1 if (($conf->{"score_bad"}) && ($changes > $conf->{"score_bad"}));
    return 0;
}

sub register_scanner
{
    my $self = shift;
    my $name = shift;
    my $scanner = shift;

    $self->{"scanners"}->{lc($name)} = $scanner;
}

sub load_mime_filenames
{
    my ($self, $mimetypes) = @_;
	
	local *MT;
	return undef unless open(MT, "< $mimetypes");
	while (<MT>)
	{
	    if (/^([a-z]\S+)\s+(\S+)/i)
		{
		    my ($type, $ext) = (lc($1), lc($2));
			$self->{"filenames"}->{$type} = "unnamed.$ext"
			  unless (defined $self->{"filenames"}->{$type});
		}
	}
	close(MT);
	return 1;
}

##[ Helper routines ]##########################################################

sub WrappedParsers
{
    my $self = shift;
	my $wrapped = { };
	
	foreach my $p (keys(%{ $self->{"parsers"} }))
	{
	    my $c = $self->{"parsers"}->{$p}; 
	    $wrapped->{$p} = sub { return &$c($self, @_); };
	}

	return $wrapped;
}


##[ Sanitizers, output, etc. ]#################################################


# This will print out the contents of $log, if the time looks right.
#
# Unfortunately, we can't guarantee that the time will /ever/ be right,
# so this may fail to embed the logs in the message - they are guaranteed
# to go to stderr though.  The logs try to be as unobtrusive as possible.
#
sub DumpLog
{
    my $self = shift;
    my $plog = shift;
	my $conf = $self->{"conf"};
	
    my $part = shift;
    my $finished = shift;
    my $type = $part->{"mime"}->{"_type"} if ($part);

    my $inline = (($conf->{"feat_log_inline"}) && ($part));

    my $alevel = SLOG_WARNING|SLOG_ERROR;
    $alevel = SLOG_ALL if ($conf->{"feat_log_trace"});

	my $astext = $self->{"log"}->print_as_text($alevel);
	$inline = 0 if ($astext =~ /^\s*$/);

    my $ppart;
    if ($inline)
	{
	    if ($ppart = $part->{"parent"})
		{
            $inline = 0 if ($ppart->{"parent"});
			$inline = 0 if ($ppart->{"mime"}->{"_type"} =~ /^(multipart\/mixed|text\/)/i);
			$inline = 0 if ($type !~ /^text\/(plain|html)/i);
			$inline = 0 if ($part->{"sanitizer_dumped_log"});
		}
	}
	
    my $prelog = \$conf->{"msg_log_prefix"};
    my $signature = "";
    $signature = $conf->{"msg_signature"} unless ($conf->{"feat_testing"});

	print STDERR "DumpLog: inline=$inline finished=$finished\n" 
	  if ($ENV{SANITIZER_DEBUG});

    if (($inline || $finished) && 
        ($self->{"logd_mod_id"} != $self->{"mod_id"}))
	{
	    # Record total changes (so far).
		$self->{"logd_mod_id"} = $self->{"mod_id"};
		$plog->entry("modifications", SLOG_INFO, 
		             { base  => $self->{"base_mod_id"},
					   end   => $self->{"mod_id"},
					   # Scores of >1000000 are magic, used by ! policies.
					   total => ($self->{"mod_id"}-$self->{"base_mod_id"}) % 1000000 },
					 "Total modifications so far: %total%");
	}

    if ($inline)
    {
	    $alevel |= SLOG_INFO;
        if ($type =~ /^multipart\/mixed/i) 
        {
		    $part->WriteText("\n") unless ($part->{"Wrote_NL"});
            $part->WriteText(
                $part->{"mime"}->{"_boundpre"}. $part->{"mime"}->{"boundary"} ."\n".
                "Content-Type: ". $conf->{"sanitizer_log_type"} ."\n".
                "Content-Transfer-Encoding: 8bit"."\n".
                "Content-Disposition: ". $conf->{"sanitizer_log_disp"} ."\n");

            if (my $size = $conf->{"feat_log_after"})
			{
				$part->WriteText($self->CreateScratchSpace($part->{"log"}, $size));
			}

            if ($conf->{"feat_log_inline"})
			{
			    $part->WriteText("\n");
			    if ($conf->{"feat_log_xml"})
				{
                    $part->WriteText("<Sanitizer_Log>\n <blurb>\n".$self->expand($$prelog)."\n </blurb>\n");
					$part->WriteText($self->{"log"}->print_as_xml($alevel));
					$part->WriteText($self->expand(" <RevTxt>\n$signature\n </RevTxt>\n</Sanitizer_Log>\n"));
					print STDERR "DumpLog: dumped as XML.\n" 
	  			      if ($ENV{SANITIZER_DEBUG});
			    }
				else
				{
                    $part->WriteText($self->expand($$prelog)."\n");
			        $part->WriteText($self->{"log"}->print_as_text($alevel));
				    $part->WriteText($self->expand("\n$signature\n"));
					print STDERR "DumpLog: dumped as text.\n" 
	  			      if ($ENV{SANITIZER_DEBUG});
				}
			}
			else
			{
			    # No log, we're adding logs after the fact in scratch spaces.
				$part->WriteText("\n\n");
				print STDERR "DumpLog: not dumping logs: no thanks!\n" 
	  			  if ($ENV{SANITIZER_DEBUG});
			}
			$part->{"sanitizer_dumped_log"} = 1;
        }
		elsif (($conf->{"feat_log_inline"} > 1) && ($ppart))
		{
		    # Do nothing, we know a multipart opportunity will show
			# up eventually. :-)
			print STDERR "DumpLog: not dumping logs: expect multipart.\n\n" 
	  		  if ($ENV{SANITIZER_DEBUG});
		}
        elsif ($type =~ /^(text\/plain|application\/pgp)/i)
        {
            $part->WriteText($self->expand("\n-- \n$$prelog\n"));
			$part->WriteText($self->{"log"}->print_as_text($alevel));
			$part->WriteText($self->expand("\n$signature\n"));
			$part->{"sanitizer_dumped_log"} = 1;
			print STDERR "DumpLog: dumped inline as text.\n" 
	  		  if ($ENV{SANITIZER_DEBUG});
        }
        elsif ($type =~ /^text\/html/i) 
        {
            $part->WriteText($self->expand("<BR><HR><TABLE BORDER=1 BGCOLOR=\"white\"><TR><TD><B>$$prelog</B><P>\n"));
			$part->WriteText($self->{"log"}->print_as_html($alevel, "", "black"));
			$part->WriteText($self->expand("<P>$signature<P></TD></TR></TABLE>\n"));
			$part->{"sanitizer_dumped_log"} = 1;
			print STDERR "DumpLog: dumped inline as HTML.\n" 
	  		  if ($ENV{SANITIZER_DEBUG});
        }
		else
		{
			print STDERR "DumpLog: fell through, no space for log!\n" 
	  		  if ($ENV{SANITIZER_DEBUG});
		}
    }
    if (($finished) && ($conf->{"feat_log_stderr"}))
    {
	    $alevel |= SLOG_INFO;
	    if ($conf->{"feat_log_xml"})
		{
		    print STDERR $self->{"log"}->print_as_xml($alevel);
		}
		else
		{
            print STDERR $self->{"log"}->print_as_text($alevel);
		}
    }
}


# This will create a new file based on the "file_name_tpl" template.
# Returns undef on failure.
#
my $caf_inc = 0;
my @charray = ('A'..'Z', 0..9);
sub CreateAttFile
{
    my $self = shift;
	my $conf = $self->{"conf"};
    my $fn = shift;
    my $ofn = shift;
    my $fh = undef;
    my $cnt = 0;

    # This is the file name we use on disk - keep it as simple as possible.
    $ofn = Anomy::MIMEStream::Encode7bit(undef, $ofn);
    $ofn =~ s/[^A-Za-z0-9\.-]/_/gs;
	# Keep it short!
    $ofn =~ s/_+/_/g;
    $ofn =~ s/^.*(.{80})$/$1/;

    do
    {
        my $T = time();
        my ($S, $M, $H, $d, $m, $y, $wd, $yd) = localtime($T);
        $$fn = $conf->{"file_name_tpl"} || return undef;

        # Date stuff
        $$fn =~ s/\$T/ sprintf("%x", $T) /eg;
        $$fn =~ s/\$S/ sprintf("%2.2d", $S) /eg;
        $$fn =~ s/\$M/ sprintf("%2.2d", $M) /eg;
        $$fn =~ s/\$H/ sprintf("%2.2d", $H) /eg;
        $$fn =~ s/\$d/ sprintf("%2.2d", $d) /eg;
        $$fn =~ s/\$m/ sprintf("%2.2d", $m + 1) /eg;
        $$fn =~ s/\$y/ sprintf("%2.2d", $y % 100) /eg;
        $$fn =~ s/\$Y/ sprintf("%d", $y + 1900) /eg;
        $$fn =~ s/\$w/ sprintf("%d", $wd) /eg;
        $$fn =~ s/\$j/ sprintf("%03d", $yd + 1) /eg;

        # PID
        $$fn =~ s/\$P/ sprintf("%x", $$) /eg;
        
        # Safe file name
        $$fn =~ s/\$F/$ofn/g;

        # Random characters
        if ($conf->{"feat_testing"})
        {
            $$fn =~ s/\$/ $charray[ ($caf_inc++ % 35) ] /eg;
        }
        else
        {
            $$fn =~ s/\$/ $charray[ int(rand(35.99)) ] /eg;
        }
        eval '$fh = '.$conf->{system_io_file}.'->new($$fn, O_CREAT|O_EXCL|O_RDWR);';
    }
    while (($cnt++ < 5) && (!defined $fh));

    # FIXME: This is better than the old cryptic error, but we really 
    #        should just return undef and let the caller handle things.
    # Unfortunately, when I tested that it caused unpredictable cleanup
    # behavior if file-name templates were non-unique.
    # 
    die "Failed to create $$fn: $!\n".
        "HINT: Make sure file_name_tpl suggests file names in a directory\n".
        "      which exists and is writable by the Sanitizer.\n" 
      unless (defined ($fh));

    binmode($fh);
    return $fh;
}

# Zero out scan result variables, just in case
sub ResetScanFileVariables
{
    my $self = shift;
	foreach my $k (grep(/^filescan-/i, keys(%{ $self })))
	{
	    $self->{$k} = undef;
	}
}

# Scan a file for viruses, using the given command string.
#
sub ScanFile
{
    my $self = shift;
    my $plog = shift;
    my ($e1, $e2, $e3, $cmd) = split(/:/, shift, 4);
    my $filename = shift;
    my $fh = shift;
    my $md5x2 = shift;  # This is a double MD5 sum of the attachment data.
	my $fnp = shift;

    return 0 unless (defined $cmd);

    my $log = $plog->sublog("ScanFile", SLOG_TRACE, { file => $filename });

    my @args = map {
	                 $_ =~ s/%FILENAME/$filename/gi; 
	                 $_ =~ s/%ATTNAME/$$fnp/gi; 
	                 $_ =~ s/%REPLY_TO/$self->{common}->{"reply-to"}/gi;
	                 $_ =~ s/%ERRORS_TO/$self->{common}->{"errors-to"}/gi;
	                 $_ =~ s/%HEADER\(([a-z_-]+)\)/$self->{common}->{headers}->{$1}/gi;
					 $_ 
				   } split(/\s+/, $cmd);
    my $spid = undef;
    my $sleeps = 0;
	my $result = -9999;

    $log->entry("command", SLOG_TRACE|SLOG_DEBUG, undef, $cmd);
	
    # Use built-in scanner if requested.
    if (lc($args[0]) =~ /^builtin\/?(.*)$/)
    {
        my $cmd = $self->{"scanners"}->{lc($1)};
		my $junk = shift @args;

		$result = (256 * &$cmd($self, $log, $fh, $md5x2, @args)) 
		  if (defined $cmd);
    }
	else
	{
        # Flush buffers, before forking.
		STDOUT->flush();
		STDERR->flush();

        do
		{
            unless (defined ($spid = open(SCANNER, "-|")))
			{
                $log->entry("error", SLOG_ERROR, { text => "$!" }, 
			                "Cannot fork: %text%");

                return 3 if ($sleeps++ > 6);
                sleep(10);
            }
        } until (defined $spid);

        if (!$spid) # Are we the kid?
		{
            print STDOUT "Scan cmd: ", join(' ', @args), "\n";
		
		    # We want the scanner's stderr to be sent to stdout!
            close(STDERR) && 
              open(STDERR, ">&STDOUT") || 
                print STDOUT "WARNING:  Couldn't dup STDOUT!\n";

            STDOUT->flush();

            unless (exec { $args[0] } @args)
            {
                print STDOUT "Exec failed: $!\n";
                die "Exec failed: $!";
            }
        }

        # Not the kid, read the scanner's output.
        while (my $l = <SCANNER>)
        {
		    if ($l =~ /^anomy-(filescan-\S+):\s+(.*)\s*$/i)
			{
			    print STDERR "*** Got $1 = $2\n";
			    $self->{lc($1)} = $2;
			}
            $log->entry("output", SLOG_TRACE|SLOG_DEBUG, undef, $l);
        }
        close (SCANNER);
		$result = $self->{"filescan-result"} || $?;
    }
	my $rs = $self->{"filescan-summary"};
	my $rt = $self->{"filescan-description"};
	
    # Was file clean?
    for my $v (split(/,/, $e1))
    {
        if ($result == (256 * $v))
        {
			$log->entry("result", SLOG_INFO, 
	                    { summary => $rs || "clean", code => $result }, 
     	                $rt || "Scan succeeded, file is clean.");
            return 0;
        }
    }

    # Was file dirty, but is now clean?
    for my $v (split(/,/, $e2))
    {
        if ($result == (256 * $v))
        {
	        $log->entry("result", SLOG_WARNING|SLOG_INFO, 
                        { summary => $rs || "disinfected", code => $result }, 
                        $rt || "File was infected, but the virus checker fixed it.");
            return 1;
        }
    }

    # Was file dirty and unfixable?
    for my $v (split(/,/, $e3))
    {
        if ($result == (256 * $v))
        {
	        $log->entry("result", SLOG_WARNING|SLOG_INFO, 
	                    { summary => $rs || "infected", code => $result }, 
                        $rt || "File was infected, the virus checker couldn't fix it.");
            return 2;
        }
    }

    $log->entry("result", SLOG_WARNING|SLOG_INFO|SLOG_ERROR, 
                { summary => $rs || "error", code => $result },
      	        $rt || "Unknown exit code: %code%");

    return 3;
}


# Clean/scan a file, sanitize the file name.  
#
# This will change the part's reader pointer to point to a virtual on-disk
# part, if a virus scanner is used.
#
sub SanitizeFile
{
    my ($self, $part, $mime) = @_;
	my $conf = $self->{"conf"};

    my $unknown = undef;
    my $fnp = \$unknown;
    my $typep = \$unknown;
    my $minc = 0;
    
    for my $fhn ("name", "filename")
    {
        $fnp = \$mime->{$fhn} if ($mime->{$fhn}); 
    }
    $typep = \$mime->{"_type"} if ($mime->{"_type"}); 
	
    # Don't apply rules to multipart or message parts, or inline
    # parts (e.g. forwarded messages).
    return undef if ($$typep =~ /^m(ultipart|essage)\//i);

    my $log = new Anomy::Log;
	$self->ResetScanFileVariables();
    
    # If no file name is specified, create one from the MIME type.
    if (!defined $$fnp) 
    {
        my $t = $self->{"filenames"}->{lc($$typep)};
        $t = $conf->{"file_default_filename"} if (!defined $t);
        $fnp = \$t if ($t);

        # Abort if the part still has no file name.
        return undef unless ($$fnp);

        # Forcibly add a file name to unnamed parts?
        if (($conf->{"feat_force_name"}) &&
            ($part->{"mime"}->{"_type"}) && 
            ($part->{"mime"}->{"_type"} !~ /^(inline\/)?text\/(plain|html)/i))
        {
            $part->{"mime-headers"}->{"content-type"} .= " name";
            $mime->{"name"} = $t;
            $fnp = \$mime->{"name"};
			$minc = 1 unless ($minc);

            $log->entry("default_name", SLOG_INFO, { default => $t },
                        "No attachment name found, using default (%default%).");
        }
    }

	# Get a list of all possible file names for this attachment
	my @filenames = map { ($_->{"data"}, $_->{"raw"}) } $part->GetMIMEAttributes('(?i)^(file)?name$');
	push @filenames, $$fnp;
    
    # Make sure the list doesn't repeat itself.
	my $l = undef;
     @filenames = grep { ($_ ne $l) && ($l = $_) } sort @filenames;
    my %filenames = map { $_ => 1 } @filenames;

    # Look even harder...
	if ($conf->{"feat_mime_files"})
	{
	    my @mimetypes = map { ($_->{"data"}, $_->{"raw"}) } $part->GetMIMEAttributes('(?i)^_type$');
		foreach my $t (@mimetypes)
		{
		    my $name = $self->{"filenames"}->{lc($t)};
            if ($name && !$filenames{$name})
            {
			    push @filenames, $name;
                $filenames{$name} = 1;
            }
		}
	}

    # And keep looking harder...
	foreach my $h ("_description", "_id")
	{
	    foreach my $v (map { ($_->{"data"}, $_->{"raw"}) } $part->GetMIMEAttributes("(?i)^$h\$"))
		{
		    $v = $1 if ($v =~ /^<+(.*?)>+\s*$/);
            # Skip @foo.com type names, they shouldn't match anyway.
            next if ($v =~ /\@\S+\.com$/i);
            if (($v =~ /\./) && (!$filenames{$v}))
            {
			    push @filenames, $v; 
                $filenames{$v} = 1;
            }
		}
	}

	my $filenames = join(', ', @filenames);

    # Insert our log into part log.
    $part->{"log"}->sublog("SanitizeFile", SLOG_TRACE,
                           { filename => $filenames, 
						     mimetype => $$typep,
						   }, $log);

    # Store original file name & type, initialize other variables.
    my $ofn = $$fnp;
    my $otype = $$typep;
    my $pol = undef;
    my @matched = ( );
    my $fh = undef;
    my $filename = undef;
    my $scanned = 0;

    # Check policies...
	my @rules = (0..$conf->{"file_list_rules"}, "default");
	my $rec = 0;
	my $ip = 1;
    while (($ip > 0) && ($ip < @rules) && ($rec++ < (@rules * 2)))
    {
	    my $i = $rules[$ip]; 
		$ip++;
        next unless (($i eq "default") || ($conf->{"file_list_$i"}));

        $log->entry("Check_Rule", SLOG_TRACE, 
		            { rule => $i, list => $conf->{"file_list_$i"} },
					"Rule %rule%: %list%")
          if ($i ne "default");

        if (($i eq "default") || 
		    (my @fn_match = grep { $_ =~ $conf->{"file_list_$i"} } @filenames))
        {
		    $ofn = $$fnp = $fn_match[0] if (@fn_match && ($ofn eq $$fnp));
		    my %fn_match = ( names => join(', ', @fn_match)) if (@fn_match);
            my $mlog = $log->sublog("Match", SLOG_INFO, 
			                        { rule  => $i, %fn_match });

            push @matched, $i;
            my @policy = split(":", $conf->{"file_default_policy"});

            @policy = split(":", $conf->{"file_list_${i}_policy"})
                if ($conf->{"file_list_${i}_policy"});

            my $scan_result = 0;
            if ((@policy == 1) && (lc($policy[0]) =~ /^drop\!?$\s*/))
            {
                while ($part->Read()) { };
            }
            elsif ((@policy == 4) || (lc($policy[0]) =~ /^save\!?\s*$/))
            {
                # Create a file name from our template.
                my $ofh = $fh;
                if (($fh) || ($fh = $self->CreateAttFile(\$filename, $$fnp)))
                {
				    my $md5_1 = new Digest::MD5;
					my $md5_2 = new Digest::MD5; $md5_2->add("Shift!");
					my $size = 0;

                    if (!$ofh)
                    {
                        # Save the attachment to disk...
                        while (my $l = $part->Read())
                        {
                            # FIXME:  need better error handling!
                            $fh->print($l) || die;
							$md5_1->add($l);
							$md5_2->add($l);
							$size += length($l);
                        }

                        $fh->flush();
                    }
					
					# Log stuff.
					my $dig = $md5_1->hexdigest().$md5_2->hexdigest();
					$mlog->entry("saved-file", SLOG_TRACE, 
					             { file => $filename, digest => $dig, size => $size },
								 "Saved attachment as %file% (%size% bytes, digest %digest%).");

                    # We want to scan this attachment
                    if (@policy == 4)
                    {
                        $scan_result = 
                            $self->ScanFile($mlog,
							                $conf->{"file_list_${i}_scanner"},
											$filename, $fh, $dig, $fnp);
                        $scanned = 1;

						if (my $n = $self->{"filescan-newfile"})
						{
						    print STDERR "*** New file! $n\n";
                            my $nfh;
                            eval '$nfh = new '.$conf->{system_io_file}.' "<$n";';
							if ($nfh)
							{
							    $fh = $nfh;
							    $filename = $n;
								
								$mime->{"_encoding"} = "Base64";
								$part->{"encoder"} = $part->{"encoders"}->{"base64"};;
								
								$minc = 1 unless ($minc);
							}
						}
						if (my $n = $self->{"filescan-newname"})
						{
							$n =~ s,^.*/,,g;
							$$fnp = $n;
							$minc = 1 unless ($minc);
						}
						if (my $t = $self->{"filescan-newtype"})
						{
							$mime->{"_type"} = $t;
							$minc = 1 unless ($minc);
						}
						if (my $enc = $self->{"filescan-newenc"})
						{
							unless ($part->{"encoder"} = $part->{"encoders"}->{lc($enc)})
							{
							    $enc = 'Base64';
								$part->{"encoder"} = $part->{"encoders"}->{lc($enc)};
							}
							$mime->{"_encoding"} = $enc;
							$minc = 1 unless ($minc);
						}
                    }
                }
                else
                {
                    # Error :(	
  	                $mlog->entry("error", SLOG_WARNING|SLOG_INFO|SLOG_ERROR, undef, 
		                         "Failed to create temporary file for scanning attachment!");
                    $scan_result = 3;
                }
            }
			
            $pol = lc($policy[$scan_result]) || "defang";
            $pol =~ s/\s*$//g;

			# Branching is cool.
			$ip = $1 if ($pol =~ s/\^(\d+)$//);

            # 1000000 is magic...
            $minc = 1000001 if ($pol =~ s/\!$//);
			
			my $llev = SLOG_INFO;
			$llev |= SLOG_WARNING if ($pol !~ /accept/i);
	        $mlog->entry("policy", $llev, { name => $pol }, 
	                     "Enforced policy: %name%");

            # Enforce policy, based on scan result.
            if ($pol eq "mangle")
            {
                $minc = 1 unless ($minc);
                $$fnp = $conf->{"msg_blacklisted"} .".". $conf->{"msg_defanged"} ."-".$self->{"mod_id"};
                $$typep = "application/". $conf->{"msg_defanged"} ."-".$self->{"mod_id"};
                $mime->{"_id"} = "<".$conf->{"msg_defanged"} ."-".$self->{"mod_id"}.">";
            }
            elsif ($pol eq "defang")
            {
                $minc = 1 unless ($minc);
                $$fnp =~ s/\./_/g;
                $$fnp .= ".". $conf->{"msg_defanged"} ."-".$self->{"mod_id"};
                $$typep = "application/". $conf->{"msg_defanged"} ."-".$self->{"mod_id"};
                $mime->{"_id"} = "<".$conf->{"msg_defanged"} ."-".$self->{"mod_id"}.">";
            }
            elsif ($pol =~ /^(drop|save|panic)$/i)
            {
                $minc = 1 unless ($minc);

                my $what = $1;
                my $fn = $filename;
                $fn =~ s/^.*\///g;

                # Make info available to the replacement template code.
	            $conf->{"var_filename"}    = $$fnp;
				$conf->{"var_savedname"}   = $fn;
				$conf->{"var_virusname"}   = $self->{"filescan-virusname"};
                $conf->{"var_summary"}     = $self->{"filescan-summary"};
                $conf->{"var_description"} = $self->{"filescan-description"};

                # This keeps the file from getting reincluded.
                while ($part->Read()) { };
                $fh = undef;

                # Replace it with our "file deleted" message.
                my $msg = $self->expand_msg("file_". lc($what));
                my $eol = $part->{"EOL"};
                $msg =~ s/\n/$eol/gs;
                $part->UnRead($msg);

                # Fix encoding etc.
                $mime->{"_encoding"} = "8bit";
                $mime->{"charset"} = $conf->{"var_def_charset"};
                $mime->{"_disposition"} = "inline";
                $mime->{"_id"} = "<".$conf->{"msg_defanged"} ."-".$self->{"mod_id"}.">";
                $part->{"encoder"} = $part->{"encoders"}->{"8bit"};
                if ($part->{"uupart"})
                {
                    $part->{"postponed"} = undef;
                    $part->{"rawheader"} = undef;
                    $part->{"uupart"} = 0;
                }
                $$typep = "text/plain";
                $$fnp = $conf->{"msg_defanged"} ."-".$self->{"mod_id"}.".txt";

				# Make sure our modifications are actually used!
				$part->{"mime-headers"}->{"content-type"} = "_type charset";
				$part->{"mime-headers"}->{"content-type"} .= " name" if ($mime->{"name"});
				$part->{"mime-headers"}->{"content-transfer-encoding"} = "_encoding";
				$part->{"mime-headers"}->{"content-disposition"} = "_disposition";
				$part->{"mime-headers"}->{"content-disposition"} .= " name" if ($mime->{"name"});

                # This may keep the file from getting deleted.
                $filename = undef if ($pol eq "save");
            }

            if (defined $fh)
            {
                # Prepare to re-read attachment from disk.
                # I suppose this is bad OO-form.
                $fh->seek(0, SEEK_SET);
				$part->ReadFrom($fh);
            }

            # Keep trying?
            next if ($pol eq "unknown");

            # Keep trying?
            if ($pol eq "warn")
			{
	            $minc = 1 unless ($minc);
	            next;
			}

            # Done!
            last;
        }
    }

	# Cleanup.
	unlink $filename if ($filename);

	# Ensure the file name matches the MIME type
	my $check = undef;
	if (($conf->{"feat_sane_names"}) &&
        ($check = $self->{"name_type_checks"}->{lc($$typep)}) && 
	    ($$fnp !~ $check))
	{
        $log->entry("mime-type_mismatch", SLOG_INFO, 
		            { file => $$fnp, type => $$typep },
					"File name doesn't match MIME type, defanging.");

		$$fnp =~ s/\./_/g;
        $$fnp .= ".". $conf->{"msg_defanged"} ."-".$self->{"mod_id"};
        $$typep = "application/". $conf->{"msg_defanged"} ."-".$self->{"mod_id"};
	}

    # Remove spooky characters from file name.
	if (my $clist = $conf->{"file_characters"})
	{
        $$fnp =~ s/[^$clist\.-]/_/g;
	}

    # Truncate file name length, by chopping off anything preceding the
    # last 80 characters - the extension matters more than the beginning
    # of the name.
    $minc = 1 if (($$fnp =~ s/^.*(.{80})$/$1/) && (!$minc));	

    # Format the list of rules matched.
    my $matches = join(", ", @matched);
    $matches =~ s/-1/default/;

    if (($ofn) && ($$fnp ne $ofn))
    {
	    $minc = 1 unless ($minc);

        # Mangle x-mac-* headers if present, so Eudora won't work 
        # around the mangling.
        if ($part->{"rawheader"} =~ s/^x-mac/X-$conf->{"msg_defanged"}\[$self->{"mod_id"}\]-MAC/gim)
		{
	        $log->entry("defanged-x-mac", SLOG_WARNING|SLOG_INFO, 
		                { id => $self->{"mod_id"} }, 
	                    "Defanged part's X-Mac headers.");
		}
		
		# Change the description of the part as well...
		if (defined $mime->{"_description"})
		{
		    $mime->{"_description"} = "Renamed from '$ofn' to '$$fnp'";
		}

        # Log change...
	    $log->entry("new-mimetype", SLOG_WARNING|SLOG_INFO, 
		            { id => $self->{"mod_id"}, value => $$typep }, 
	                "Replaced mime type with: %value%")
             if (($otype) && ($otype ne $$typep));
        $log->entry("new-filename", SLOG_WARNING|SLOG_INFO, 
		            { id => $self->{"mod_id"}, value => $$fnp },
	                "Replaced file name with: %value%");

    }

    if ($mime->{"filename"})
    {
        $mime->{"name"} = $mime->{"filename"};
    }
	
	# Increment modification ID.
	$self->{"mod_id"} += $minc;
}


# Truncate just about anything, printing out a blurb at the same
# time.  The fourth argument is a replacement value to use instead
# of the overly-long original one.  Omitting it just truncates the
# field.
#
sub Truncate
{
    my $self = shift;
    my $dataname = shift;
    my $data = shift;
    my $maxlen = shift;
    my $safeval = shift;
    my $log = shift;

    if ($maxlen < length($$data))
    {
        $safeval = substr($$data, 0, $maxlen) unless ($safeval);

        $log->entry("truncated", SLOG_WARNING|SLOG_INFO, 
	                { id   => $self->{"mod_id"}, 
					  what => $dataname,
					  old  => $$data,
					  new  => $safeval,
					  max  => $maxlen },
					"Rewrote long ( >%max% bytes ) %what% from:\n".
					"    >>%old%<<\n".
					" to >>%new%<<");
					
        $self->{"mod_id"}++;
        $$data = $safeval;
    }
}


# This routine will truncate and rewrite message headers, to block 
# buffer-overflow exploits and filename-based trojans.
#
sub SanitizeHeaders
{
    my $self = shift;
	my $conf = $self->{"conf"};

    my $part = shift;
    my $boundfix = shift;
    my $writer = $part->{"writer"} || $part;
    my $headers = $part->{"headers"};
    my $mime = { };
    my $eol = $writer->{"EOL"};
    
    # Copy headers...
    foreach my $key (keys(%{ $part->{"mime"} }))
    {
        $mime->{$key} = $part->{"mime"}->{$key};
    }

    my $old_mod_id = $self->{"mod_id"};
	my $log = $part->{"log"};

    # Header length checks, more strict than the generic tests below.
    if ($conf->{"feat_lengths"})
    {
        $self->Truncate("MIME content-type", \$mime->{"_type"},     80, "application/octet-stream", $log);
        $self->Truncate("MIME charset",      \$mime->{"charset"},  40, $conf->{"var_def_charset"}, $log);
        $self->Truncate("MIME encoding",     \$mime->{"_encoding"}, 40, "8bit", $log);

        # Make sure subject line is of a reasonable length, if it looks 
        # remotely like it contains a file name.
        if (($headers->{"subject"} =~ /\.[A-Za-z0-9]+/) &&
            (length($headers->{"subject"}) > 256))
        {
            my $s = $writer->DecodeHeader($headers->{"subject"});
			chomp $s;
            $s =~ s/[ \t]+/ /g;
            $s =~ s/^.*(.{150,150})$/$1/s;
            $s = $writer->EncodeHeader($s).$eol;
            $writer->{"rawheader"} =~ s/(Subject:.*?)\Q$headers->{"subject"}\E/$1$conf->{"msg_defanged"}\[$self->{"mod_id"}] $s/si;
            
            $s = $headers->{"subject"};
            chomp $s;

            $log->entry("truncated-subject", SLOG_WARNING|SLOG_INFO, 
			            { id => $self->{"mod_id"}, value => $s },
						"Truncated long subject line:\n    >>%value%<<");
			$self->{"mod_id"}++;
        }
        # This is more strict than the following header scan, and is 
        # designed to explicitly defang the Outlook Date: overflow.
        if ($writer->{"rawheader"} =~ s/(Date:\s*[^\012]{60,60})([^\012]+)/$1\012X-$conf->{"msg_defanged"}-Date: [$self->{"mod_id"}] $2/gs)
        {
            $log->entry("split-date", SLOG_WARNING|SLOG_INFO, 
			            { id => $self->{"mod_id"} },
						"Split unusually long Date: header.");
			$self->{"mod_id"}++;
        }
        # This will limit the length of each individual word in the headers
        # to 196 characters, inserting a text marker and spaces when longer
        # words are encountered.  This is designed to foil attacks based on 
        # vulnerabilities like those described in various bugtraq posts 
        # in July 2000, including the USSR-2000050 advisory.
        #
        my $spaces = " " x (1 + rand()*7);
        $spaces = " " if ($conf->{"feat_testing"});
        if ($writer->{"rawheader"} =~ s/(\S{196,196})/$conf->{"msg_defanged"}\[$self->{"mod_id"}]:$1$2 "$spaces" /gs)
        {
            $log->entry("split-words", SLOG_WARNING|SLOG_INFO, 
			            { id => $self->{"mod_id"} },
                        "Split unusually long word(s) in header.");
			$self->{"mod_id"}++;
        }
    }

    # File name sanity checks...
    if ($conf->{"feat_files"})
    {
        $self->SanitizeFile($part, $mime);
    }
    
	# Add missing boundary definitions to headers, if possible.
	if (($conf->{"feat_fixmime"}) && 
	    ($mime->{"_type"} =~ m"^multipart/"i) &&
	    (!defined($mime->{"boundary"})) &&
		(my $bound = $part->GuessBoundary()))
	{
		$mime->{"boundary"} = $bound;
		$mime->{"_boundpre"} = "--";
		$mime->{"undecoded-boundary"} = $bound;
		$part->{"mime-headers"}->{"content-type"} .= " boundary";

        $log->entry("missing-boundary", SLOG_WARNING|SLOG_INFO, 
	                { id  => $self->{"mod_id"},
					  new => $mime->{"boundary"} },
					"MIME boundary missing, guessed: >>%new%<<");
		$self->{"mod_id"}++;
	}
	
	# Replace MIME type if we have exceeded our maximum nesting level.
	if (($self->{"mime_depth"} >= $self->{"conf"}->{"max_mime_depth"}) &&
	    ($mime->{"_type"} =~ m"^multipart/"i))
	{
        $mime->{"_type"} = "application/". $conf->{"msg_defanged"} ."-".$self->{"mod_id"};
        $mime->{"boundary"} = undef;

        $log->entry("multipart-depth", SLOG_WARNING|SLOG_INFO, 
	                { id  => $self->{"mod_id"} },
					"Exceeded maximum allowed MIME nesting depth.");
		$self->{"mod_id"}++;
	}
	
    # Replace the boundary string: we know ours is sane! :-)
    if ((($conf->{"feat_boundaries"}) || 
         ($mime->{"boundary"} eq "") ||
         (($conf->{"feat_fixmime"}) && ($mime->{"undecoded-boundary"} =~ /[\(\)]/))) && 
        ($boundfix) && (defined $mime->{"boundary"}))
    {
        $part->{"bad-mime-boundary"} = $mime->{"boundary"};
        if ($conf->{"feat_testing"})
        {
            $mime->{"boundary"} = "MIMEStream=_+testing".$self->{"mod_id"};
        }
        else
        {
            $mime->{"boundary"} = Anomy::MIMEStream::MakeBoundary();
        }
        $mime->{"_boundpre"} = "--";

        $log->entry("replaced-boundary", SLOG_WARNING|SLOG_INFO, 
	                { id  => $self->{"mod_id"},
					  old => $part->{"bad-mime-boundary"},
					  new => $mime->{"boundary"} },
					"Replaced MIME boundary: >>%old%<<\n".
					"                  with: >>%new%<<");
		$self->{"mod_id"}++;
    }

    # Fix encoding of output so it conforms to MIME standard.
    if (($conf->{"feat_fixmime"}) && 
        (!$part->{"uupart"}) &&
        (
		 (
		  ($mime->{"_type"} =~ m"^m(ultipart|essage)/"i) &&
		  ($mime->{"_encoding"} !~ /^([78]bit|binary)$/i)
		 ) || (
		  ($conf->{"feat_log_after"}) && ($mime->{"_type"} =~ m"text/"i) &&
          ($mime->{"_encoding"} !~ /^([78]bit|binary|quoted-printable)$/i)
		 )
	   ))
    {
        if (my $e = $part->{"encoders"}->{ "8bit" })
        {
            $log->entry("fixed-encoding", SLOG_WARNING|SLOG_INFO, 
			            { id => $self->{"mod_id"}, 
						  type => $mime->{"_type"},
						  encoding => $mime->{"_encoding"} },
                        "Fixed invalid/unusable part encoding.");
			$self->{"mod_id"}++;
            $mime->{"_encoding"} = "8bit";
            $part->{"encoder"} = $e;
            $eol = $part->{"EOL"} = $writer->{"EOL"} = $part->{"ENCODED_EOL"};
        }
        else
        {
            $log->entry("error-encoding", SLOG_WARNING|SLOG_INFO|SLOG_ERROR, undef,
                        "Couldn't fix part invalid encoding!");
        }
    }

    # Some stupid windows clients encode binary data using quoted-printable.
    # Try and fix it.
#FIXME
#    if (($conf->{"feat_fixmime"}) && 
#        ($part->{"_encoding"} =~ /quoted/i) &&
#        ($part->{"_filename"} !~ //))
#    {
#        
#    }
	
	if (($conf->{"feat_no_partial"}) &&
	    (lc($mime->{"_type"}) eq "message/partial"))
	{
        $log->entry("defanged-partial", SLOG_WARNING|SLOG_INFO, 
                    { id => $self->{"mod_id"}, 
					  type => $mime->{"_type"},
					  encoding => $mime->{"_encoding"} },
                      "Defanged dangerous message/partial encoding.");
	    $mime->{"_type"} = "application/". $conf->{"msg_defanged"} ."-".$self->{"mod_id"};
		$self->{"mod_id"}++;
	}
	
    # Rebuild the MIME headers with sane/safe values.
    my $newheaders = "";
    for my $header ("Content-Type",
                    "Content-Transfer-Encoding",
                    "Content-Disposition",
					"Content-Description",
					"Content-ID")
    {
        next unless (defined $headers->{lc($header)});

        my @fields = split(/\s+/, $part->{"mime-headers"}->{lc($header)} );
        my $t = undef;

        for my $field (@fields)
        {
            my $value = $mime->{lc($field)};

            $self->Truncate("MIME $field", \$value, 100, undef, $log) 
			  if ($conf->{"feat_lengths"});

            my $oval = $value;
            if ($value =~ /^\s*$/)
            {
				$log->entry("dropped-mime", SLOG_WARNING|SLOG_INFO, 
				            { id => $self->{"mod_id"},
							  field => $field },
							"Dropped empty MIME field: %field%");
				$self->{"mod_id"}++;
            }
            else
            {
                if ((($conf->{"feat_paranoid"}) && 
				     ($value =~ s/[\'\"\`\$]/_/g)) ||
					# Rewrite mime type if it contains illegal chars.
				    (($conf->{"feat_fixmime"}) && 
					 ($field eq "_type") &&
					 ($value =~ s/[^A-Za-z0-9\/\.\+_-]/_/g)))
                {
					$log->entry("rewrote-mime", SLOG_WARNING|SLOG_INFO, 
				                { id    => $self->{"mod_id"},
								  field => $field, 
								  old   => $oval,
								  new   => $value },
								"Rewrote MIME field %field% as\n".
								"    >>%new%<< (was >>%old%<<)");
					$self->{"mod_id"}++;
                }
                
                if ($t)
                {
				    my $v = $part->EncodeHeader($value);
					$v = "\"$v\"" if ($v !~ /^[a-zA-Z0-9]+$/); # Quote?
                    $t .= "; $field=$v";
                }
                else
                {
                    $t = $value;
                }
            }
        }
        $newheaders .= $header .": ". $t ."\012";

        # Modifying the $headers directly here should be okay, since we are
        # inserting equivalent values (not identical) unless new bugs are 
        # found, in which case the raw headers will be updated.
        $headers->{lc($header)} = $t;
    }
	
	# Add scratch space if necessary...
	if ((my $size = $conf->{"feat_log_after"}) && 
	    ($mime->{"_type"} =~ /text\//i))
	{
	    $newheaders .= $self->CreateScratchSpace($log, $size);
	}
	
    # Only modify part header if absolutely necessary.
    if (($conf->{"feat_boundaries"}) || ($self->{"mod_id"} > $old_mod_id))
    {
        $writer = Anomy::MIMEStream->Writer($writer, $mime);
        $writer->KillRawMimeHeaders();
        $writer->{"rawheader"} =~ s/\s*$//s;
        $writer->{"rawheader"} .= "\012" if ($writer->{"rawheader"} ne "");
        if (!$writer->{"parent"})
        {
            $writer->{"rawheader"} .= "MIME-Version: 1.0\012";
            $headers->{"mime-version"} = "1.0\012";
        }
        $writer->{"rawheader"} .= $newheaders . "\012";
    }

	# Check for stupid <CR>-in headers Outlook exploit...
	# Internally we always use <LF>, so this shouldn't be a problem.
	if (($conf->{"feat_fixmime"}) &&
	    ($writer->{"rawheader"} =~ s/\015/ $conf->{"msg_defanged"}\[$self->{"mod_id"}] /gs))
	{
	    $log->entry("cr-in-header", SLOG_WARNING|SLOG_INFO, 
		            { id  => $self->{"mod_id"},
					  url => "http://www.openoffice.nl/special_interest/outlookbug.html" },
					"Bare CR removed from header (re: %url%)");

		$self->{"mod_id"}++;
	}

    # Remove any active HTML from header.
    if ($conf->{"feat_html"})
    {
	    my %html_cfg = ( );
		eval "%html_cfg = (". $conf->{"html_cleaner_header"} .")" 
            if ($conf->{"html_cleaner_header"});
        my $html_cleaner = new Anomy::HTMLCleaner 
		                       { Log => $log, 
							     ModCounter    => \$self->{"mod_id"},
								 DefangString  => $conf->{"msg_defanged"},
							     NoWebBugs     => $conf->{"feat_webbugs"}, 
                                 NoExeLinks    => $conf->{"feat_html_noexe"},
                                 UnkownTagsOK  => $conf->{"feat_html_unknown"},
                                 Paranoid      => $conf->{"feat_html_paranoid"},
								 %html_cfg };
        my $rhl = $html_cleaner->clean(\$writer->{"rawheader"});
        $writer->{"rawheader"} .= $rhl;
    }
}

sub CreateScratchSpace
{
    my ($self, $log, $size) = @_;
	my $sid = sprintf("%8.8x%8.8x", rand()*0xFFFFFFFF, rand()*0xFFFFFFFF);
	$sid = "SCRATCH_".$self->{"mod_id"} if ($self->{conf}->{feat_testing});

	$log->entry("scratch-space", SLOG_WARNING|SLOG_INFO, 
	            { id => $self->{"mod_id"}, size => $size, sid => $sid },
				"Added %size% bytes of scratch space.");

    $self->{"scratch-space"}++;
	$self->{"mod_id"}++;
    return "Content-Junk: $sid ".("X" x $size)."\012";
}

##[ Parsers ]##################################################################

# Wrap a part in a multipart/mixed wrapper.
#
sub WrapWithMultipart
{
    my $self = shift;
	my $conf = $self->{"conf"};

    my $part = shift;
    my $reader = $part;
    $part->{"log"}->entry("parser", SLOG_TRACE, undef, "WrapWithMultipart");

	# Create a new boundary string
	my $bound = Anomy::MIMEStream::MakeBoundary();
	$bound = "Test_Wrapper_Boundary" if ($conf->{"feat_testing"});

	# Update headers
	my $old = $part->KillRawMimeHeaders();
    $old =~ s/^mime-version:.*?\012//mi;
	
    # Buffer hacks.  This is terribly ugly code.
 	$part->{"EncodeBase64"} = undef;
	my $buf = "--".$bound.$old."\012\012";
	$buf .= &{ $part->{"encoder"} }($part, $part->{"IOBuffer"});
	$buf .= &{ $part->{"encoder"} }($part, undef);
	# Hack to handle internal state in Base64 decoder.
	if ($part->{"DecodeBase64"})
	{
	    $buf .= $part->{"DecodeBase64"};
	}
	$part->{"DecodeBase64"} = undef;
	$part->{"IOBuffer"} = $buf;
    $part->{"EOL"} = $part->{"ENCODED_EOL"};

    # Fix parser table
	my $mt = lc($part->{"mime"}->{"_type"});	
	$part->{"parsers"}->{"$mt"} = $part->{"parsers"}->{"ORG/$mt"};

    # Create writer
    my $w = Anomy::MIMEStream->Writer($part, 
	        { 
			    "boundary" => $bound, 
			    "_boundpre" => "--", 
				"_type"     => "multipart/mixed", 
				"_encoding" => "8bit",
                "_version"  => "1.0",
			},{
                "mime-version"              => "_version",
			    "content-type"              => "_type boundary",
			    "content-transfer-encoding" => "_encoding",
			});

	# Header hack, to save a few cycles.
	$part->{"mime"} = $w->{"mime"};
	$part->{"mime-headers"} = $w->{"mime-headers"};
	undef $part->{"mime-headers"}->{"content-disposition"};

	# Log hack
	$part->{"log"} = $w->{"log"};

    # Encoding hack.
	$w->{"EncodeBase64"} = $part->{"EncodeBase64"} = undef;
	$w->{"DecodeBase64"} = $part->{"DecodeBase64"} = undef;
	$part->{"decoder"} = \&Anomy::MIMEStream::Decode8bit;
	$part->{"encoder"} = \&Anomy::MIMEStream::Encode8bit;

	return $self->CleanMultipart($part);
}

# This sanitizes a text part, and it's headers.
#
sub CleanText
{
    my $self = shift;
	my $conf = $self->{"conf"};
	my $html_cleaner = undef;

    my $part = shift;
    my $reader = $part;
    $part->{"log"}->entry("parser", SLOG_TRACE, undef, "CleanText");
    my $eol = $part->{"EOL"};

    my $ishtml = 0;
    if (($part->{"mime"}->{"filename"} =~ /html?$/i) ||
        ($part->{"mime"}->{"name"} =~ /html?$/i) ||
        ($part->{"mime"}->{"_type"} =~ /html?$/i))
    {
        $ishtml = 1;
    }

    my $in_trusted_text = 0;
    $in_trusted_text = 100
        if (($conf->{"feat_trust_pgp"}) &&
            ($part->{"mime"}->{"_type"} =~ /(multipart\/signed|application\/pgp)/i));

    $self->SanitizeHeaders($part, 1);
    my $writer = $part->{"writer"} || $part;
    $writer->WriteHeader();

    # Some sanitizations only make sense at the very top of a file. 
    # This determines how many non-blank lines are "near the top".
    my $neartop = 5;

    # Create a HTML cleaner object if needed.
    my $leftovers = undef;
	my %html_cfg = ( );
	eval "%html_cfg = (". $conf->{"html_cleaner_body"} .")" 
        if ($conf->{"html_cleaner_body"});
	$html_cleaner = new Anomy::HTMLCleaner 
	                    { Log => $part->{"log"}, 
					      ModCounter    => \$self->{"mod_id"},
						  DefangString  => $conf->{"msg_defanged"},
					      NoWebBugs     => $conf->{"feat_webbugs"},
                          NoExeLinks    => $conf->{"feat_html_noexe"},
                          UnkownTagsOK  => $conf->{"feat_html_unknown"},
                          Paranoid      => $conf->{"feat_html_paranoid"},
						  %html_cfg }
	  if ($conf->{"feat_html"});

    while (my $l = $reader->Read())
    {
        $l = $leftovers . $l if ($leftovers);

        if ($l =~ /^-+BEGIN.*?(SIGNED|PGP)\s+MESSAGE-+\s*$/smi)
        {
            if ($conf->{"feat_trust_pgp"})
            {
                $in_trusted_text++;

				$part->{"log"}->entry("trust-signed", SLOG_INFO,
				                      { id => $self->{"mod_id"}, 
									    silently => $conf->{"feat_verbose"} },
			                 "Disabled scanning for signed part of message.");

                if ($conf->{"feat_verbose"})
                {
                    $writer->WriteText($self->expand_msg("pgp_warning"));
                }
				$self->{"mod_id"}++;
            }
        }

        if ($in_trusted_text)
        {
            $in_trusted_text-- if ($l =~ /^-+END.*?(SIGNATURE|PGP\s+MESSAGE)-+\s*$/smi);
        }
        # This "else" is safe, since a PGP boundary is harmless.
        else
        {
            # Check for inline forwarded messages.
            if (($conf->{"feat_forwards"}) && 
                ($l =~ s/^(---+.*?Forward.*?---+\s*)$//smi))
            {
                my $fwd = $1;

                # Deal with leftover html snippets by closing them.
                if (($html_cleaner) && ($l !~ /^\s*$/))
                {
                    $l .= $conf->{"msg_defanged"} .".".$self->{"mod_id"}.">$eol";
					$part->{"log"}->entry("closed-html", SLOG_WARNING|SLOG_INFO, 
					                      { id => $self->{"mod_id"} },
			                              "Closed open HTML tag preceding forwarded message.");
                    $self->{"mod_id"}++;
                    $html_cleaner->clean(\$l);
                    $writer->Write($l);
                }

                $writer->Write($fwd);
                $l = $reader->ParserForwardedMessage();
            }
            # Check for inline uuencoded attachments.  
            # Sanitize their contents.
            if (($conf->{"feat_uuencoded"}) && 
                ($l =~ s/^(begin \d* \S+.*)$//smi))
            {
			    my $pre = $1;
				
				# Sanity check on UUencoded data...
				my $lh = $reader->Read();
				$reader->UnRead($lh);
                my ($len, $data) = (1000, "no match");
			    ($len, $data) = (ord($1), $2) if ($lh =~ /^(\S)(.+)$/m);
				$len -= 32;
				$len = int((($len * 4) + 2) / 3);
				if ($len == length($data))
				{
                    $reader->UnRead($pre);
					
					# Deal with leftover html snippets by closing them.
					if (($html_cleaner) && ($l !~ /^\s*$/))
					{
                        $l .= $conf->{"msg_defanged"} .".".$self->{"mod_id"}.">$eol";
						$part->{"log"}->entry("closed-html", SLOG_WARNING|SLOG_INFO,
						                      { id => $self->{"mod_id"} },
											  "Closed open HTML tag preceding uuencoded attachment.");
		  			    $self->{"mod_id"}++;
						$html_cleaner->clean(\$l);
						$writer->Write($l);
					}

					$l = Anomy::MIMEStream::ParserUUAttachment($reader);
				}
				else
				{
				    # Not really a uuencoded line, escape it.
					$pre =~ s/begin /begin_/i;
				    $reader->UnRead($pre);
					
					$part->{"log"}->entry("uu_begin_bug", SLOG_WARNING|SLOG_INFO,
					                      { id => $self->{"mod_id"},
										    url => 'http://www.rodos.net/outlook/#begin' },
										  "Escaped invalid uuencode preamble (%url%)");
					$self->{"mod_id"}++;
				}
            }
            elsif ($neartop)
            {
                # Check for Unix shell scripts.
                if (($conf->{"feat_scripts"}) && 
                    ($l =~ s/^#!/#!\/bin\/sh${eol}echo $conf->{"msg_defanged"}.$self->{"mod_id"}${eol}exit${eol}#!/gsm))
                {
					$part->{"log"}->entry("defanged-shell", SLOG_WARNING|SLOG_INFO,
					                      { id => $self->{"mod_id"} },
			                              "Defanged UNIX shell script(s).");
					$self->{"mod_id"}++;
                }
            }

            # Sanitize embedded HTML - we do this last so our hiding stuff
            # in $leftovers won't disable the other checks.
            $leftovers = $html_cleaner->clean(\$l) if ($conf->{"feat_html"});
        }

        $neartop-- if (($neartop) && ($l !~ /^\s*$/));

        if ((!$in_trusted_text) && ($ishtml) && 
            ($l =~ s/^(.*)(<\/(?:BODY|HTML))/$2/si))
        {
            $writer->Write($1);
            $self->DumpLog($part->{"log"}, $writer);
        }
        $writer->Write($l);
    }
    $writer->Write($leftovers) if ($leftovers);

    # Append log "signature style"
    $self->DumpLog($part->{"log"}, $writer) if (!$in_trusted_text);

    # Flush buffers
    $writer->Write(undef);
}


# This sanitizes the headers of an otherwise unfamiliar part.
#
sub CleanUnknown
{
    my $self = shift;
    my $part = shift;
	my $parser;
    my $guess = $part->GuessMimeType();
    $part->{"log"}->entry("parser", SLOG_TRACE, undef, "CleanUnknown");

    if ($guess =~ /text\//i)
    {
        # Why wasn't this marked as text/plain or text/html ?
        return $self->CleanText($part);
    }
    elsif ($guess && ($parser = $part->GetBodyParser($guess)))
	{
		return &$parser($part);
	}
	elsif ($part->{mime}->{"_type"} =~ m,text/,i)
	{
        return $self->CleanText($part);
	}

    return $self->CleanHeaders($part);
}


# This sanitizes only the headers of a part.
#
sub CleanHeaders
{
    my $self = shift;
    my $part = shift;
    
    $self->SanitizeHeaders($part, 0);
    Anomy::MIMEStream::ParserCat($part);

    # BUG: Can't dump logs here, could royally screw up message.
}


# This sanitizes the headers of a message/rfc822 part, before processing
# the part itself with the default handlers.
#
sub CleanRFC822
{
    my $self = shift;
    my $part = shift;
    $part->{"log"}->entry("parser", SLOG_TRACE, undef, "CleanRFC822");

    $self->SanitizeHeaders($part, 0);
    Anomy::MIMEStream::ParserRFC822($part);

    # BUG: Can't dump logs here, could royally screw up message.
}

# This will attempt to sanitize the first part of a message/partial message
# as if it were message/rfc822.  Subsequent parts are passed to the "unknown"
# cleaner.
sub CleanPartial
{
    my $self = shift;
    my $part = shift;
    $part->{"log"}->entry("parser", SLOG_TRACE, undef, "CleanPartial");

	# FIXME: Here it should be trivial to dump the attachment to a
	#        temporary file using a name based on info from the header,
	# so we could reassemble the message for scanning in it's entirety.
	#
	# COMMENTS:
	#
	#  - Sites which use multiple filtering machines will often never see
	#    all parts present on the same box!  This could be solved by
	#    bouncing all incoming message/partial messages to a dedicated
	#    reassembly box instead of invoking Anomy and delivering.
	#  - The last seen message would have to be replaced with the assembled
	#    one, inline.
	#  - Other messages would be replaced with a "reassembling" message,
	#    Anomy (being a filter) doesn't have enough control to /dev/null 
	#    entire messages.
	#  - It would be foolish to trust only the ID provided by the incoming 
	#    MIME headers.

    return $self->CleanRFC822($part) if ($part->{"mime"}->{"number"} == 1);
    return $self->CleanUnknown($part);
}

# This sanitizes the headers of a multipart part, before processing the
# part itself with the default handlers.
#
sub CleanMultipart
{
    my $self = shift;
    my $reader = shift;

	if ($self->{"mime_depth"} >= $self->{"conf"}->{"max_mime_depth"})
	{
	    # We only parse recursive MIME up to a certain limit...
	    return $self->CleanHeaders($reader);
	}
	
    $reader->{"log"}->entry("parser", SLOG_TRACE, undef, "CleanMultipart");

    $self->SanitizeHeaders($reader, 1);
	$self->{"mime_depth"}++;
    Anomy::MIMEStream::ParserUnclosedMultipart($reader);
    $self->{"mime_depth"}--;

    # Append sanitization log as seperate text part, if this is the end 
    # of the outermost multipart/* part.
    #
    my $writer = $reader->{"writer"} || $reader;
    $self->DumpLog($writer->{"log"}, $writer);

    # Garbage collection & cleanup.
    $writer->Close();
    $writer->Amputate();
}

# This sanitizes the headers of a MS-TNEF part, decodes the MS-TNEF data
# sanitizes the contents as if it were MIME and then puts it all back 
# together again.
#
sub CleanMS_TNEF
{
    my $self = shift;
    my $reader = shift;
	my $conf = $self->{"conf"};
    my $tnef;

    # Use lazy loading of the TNEFStream stuff, since it has alot of
    # dependancies.  If it's not installed and/or dependencies aren't
    # satisfied we just fall back to "unknown".
    eval 'use Anomy::TNEFStream; $tnef = new Anomy::TNEFStream;';
    if ($@)
    {
        $reader->{"log"}->entry("parser", SLOG_TRACE, undef, 
                                "Anomy::TNEFStream not available (probably due to dependancies).");
        return $self->CleanUnknown($reader, @_) if ($@);
    }

    my $printer = $tnef;
    my $writer = $reader->{"writer"};
    $reader->{"log"}->entry("parser", SLOG_TRACE, undef, "CleanMS_TNEF");
    $tnef->Testing(1) if ($conf->{"feat_testing"});

    if ($conf->{"feat_kill_tnef"})
    {
	    # Log change!
	    $reader->{"log"}->entry("TNEF_to_MIME", SLOG_WARNING|SLOG_INFO,
		                        { id => $self->{"mod_id"} },
		                        "Converted Microsoft TNEF encoded data to MIME.");
		$self->{"mod_id"}++;
	
	    # When killing TNEF we want to modify the part's headers and
	    # force the encoding to be 8bit.
		if ($reader->{"mime"}->{"_encoding"} =~ /8bit/i)
		{ 
	        $writer = $reader;
		}
		else
		{
		    my $mime = {
	            "_encoding" => "8bit",
				"boundary" => $tnef->{boundary},
				"_boundpre" => "--",
				"_type" => "multipart/mixed",
	        };
			my $pos = {
			    "content-type" => "_type boundary",
				"content-transfer-encoding" => "_encoding",
			};
			$writer = Anomy::MIMEStream->Writer($reader, $mime, $pos);
		}
		$printer = $writer;

	    # Include old header info
		my $rh = $reader->{"rawheader"};
		chomp $rh;
		$rh =~ s/^([^\s]+)/X-$1/gm;
	
	    $writer->{"rawheader"} = $rh . $writer->{"rawheader"};
    }
    else
    {
        $writer = $reader;
        $self->SanitizeHeaders($writer, 0);
    }

    $writer->WriteHeader();

    my $tnef_reader = sub { return $reader->Read(); };
    my $tnef_writer = sub { return $writer->Write(@_); };
#    my $debug = undef; #sub { print STDERR @_; };
    my $debug = sub { print STDERR @_; };

    my $log = $reader->{"log"}->sublog("Part", SLOG_TRACE, 
	                                   { pos => $reader->{"Read_Bytes"} });

    if (0 == $tnef->parse($tnef_reader, $debug))
    {
        if ($tnef->tnef_to_mime($debug))
		{
	        # Initialize reader/writer functions.
            my $r = $tnef->get_mime_reader();
			my $w = $tnef->get_mime_writer();

            my $subpart = Anomy::MIMEStream->New($tnef, $printer, 
	                                             $writer->{"parsers"});

			$subpart->{"log"} = $log;
			$subpart->{"common"} = $reader->{"common"};

	        # Set up variables needed for parsing (necessary because
			# we can't call ParseHeader).
			$subpart->{"mime"}->{"boundary"} = $tnef->{"boundary"};
			$subpart->{"mime"}->{"_boundpre"} = "--";

            # Parse & sanitize
			$subpart->ParserMultipart($writer);

	        # Reformat to TNEF
            $tnef->mime_to_tnef($debug) unless ($conf->{"feat_kill_tnef"});
		}
    }
    $tnef->dump_tnef($tnef_reader, $tnef_writer, $debug)
      unless ($conf->{"feat_kill_tnef"});

    # Flush.
    $tnef->purge();
    $writer->Write(undef);
}

#EOF#
# vi:ts=4 expandtab
1;
