#!/usr/bin/env perl
BEGIN { pop @INC if $INC[-1] eq '.' }
use 5.006; # and v7
use warnings;
use strict;

# textmail - mail filter to replace MS Word/HTML attachments with plain text
#
# Copyright (C) 2003-2007, 2011, 2020-2023 raf <raf@raf.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <https://www.gnu.org/licenses/>.
#
# 20230313 raf <raf@raf.org>

=head1 NAME

I<textmail> - mail filter to replace MS Word/HTML attachments with plain text

=head1 SYNOPSIS

 usage: textmail [options]
 options:

   --help    - Print the help message then exit
   --version - Print the version message then exit
   -h        - Print the help message then exit
   -m        - Print the manpage then exit
   -w        - Print the manpage in HTML format then exit
   -r        - Print the manpage in nroff format then exit
   -M        - Output in mailbox format (mboxrd)
   -T        - Output in raw mail format (for SMTP)
   -W        - Don't replace MS Word attachments with text
   -E        - Don't replace MS Excel attachments with csv
   -H        - Don't replace HTML attachments with text
   -R        - Don't replace RTF attachments with text
   -P        - Don't replace PDF attachments with text
   -U        - Don't translate winmail.dat attachments
   -L        - Don't reduce appledouble attachments
   -I        - Don't delete image attachments
   -A        - Don't delete audio attachments
   -V        - Don't delete video attachments
   -X        - Don't delete MS Windows executable attachments
   -B        - Don't recode text that is base64-encoded
   -S        - Don't replace spaces in filenames with underscores
   -Z        - Do translate signed content (discards signatures)
   -O        - Delete all application/octet-stream attachments
   -!        - Delete all application/* attachments
   -D hdrs   - Delete headers (list of header prefixes and filenames)
   -K types  - Keep attachments (list of mimetypes/exts and filenames)
   -F types  - Save attachments (list of mimetypes/exts and filenames)
   -G path   - Directory to save attachments in (for use with -F)
   -C spec   - Custom attachment translations (mimetype_or_ext:ext:cmd)
   -Y        - Choose plain text alternatives over translated HTML
   -Q spec   - Custom patterns to identify vestigial text alternatives
   -f        - On translation error, keep translation, not original
   -?        - Print paths of helper applications then exit

=head1 DESCRIPTION

I<textmail> filters a mail message or mbox file, replacing MS Word, MS
Excel, HTML, RTF and PDF attachments with the plain text contained therein.
By default, the following attachments are also deleted: image, audio, video,
and MS Windows executables. MS C<winmail.dat> attachments are replaced by
any attachments contained therein which are then replaced by text or deleted
in the same fashion. Any of these actions can be suppressed with command
line options. Mail headers can also be selectively deleted. Attachments can
also be extracted and saved to disk.

This is useful for increasing the accessibility of mail messages (by
reducing their dependence on proprietary file formats), for dramatically
reducing their size (and the time it takes to download them and the time it
takes to read them), and for dramatically reducing the risk of mail-borne
viruses. Its intended use is to reduce the size of a personal email archive
but it could also be useful as a pre-processor for mailing lists. This is
more friendly than a strict "No Attachments" or "No HTML" mailing list
policy.

=head1 OPTIONS

=over 4

=item C<--help>

Print the help message then exit.

=item C<--version>

Print the version message then exit.

=item C<-h>

Print the help message then exit.

=item C<-m>

Print the manpage then exit. This is equivalent to executing C<man textmail>
but this works even when the manpage isn't installed.

=item C<-w>

Print the manpage in HTML format then exit. This lets you install the
manpage in HTML format with a command like:

  mkdir -p /usr/local/share/doc/textmail/html &&
  textmail -w > /usr/local/share/doc/textmail/html/textmail.1.html

=item C<-r>

Print the manpage in nroff format then exit. This lets you install the
manpage with a command like:

  textmail -r > /usr/local/share/man/man1/textmail.1

=item C<-M>

This option causes the output to be in mboxrd format by adding a mailbox
C<From> line at the top if there isn't one already and ensures that there is
a blank line at the bottom of the output. It also performs mailbox quoting
on any lines in the body that look like mailbox C<From> headers. Use this
when the output is to be stored directly in a mailbox file. It is not
necessary when I<textmail> is being used as a mail filter by I<procmail(1)>.

=item C<-T>

This option causes the output to be in raw mail format by removing any
mailbox C<From> line and by not performing mailbox quoting. Use this when
the output is to be sent directly to an SMTP server. It is not necessary
when I<textmail> is being used as a mail filter by I<procmail(1)>.

=item C<-W>

By default, I<textmail> replaces MS Word attachments with inline plain text
attachments that contain just the plain text within the original document.
This option leaves MS Word attachments intact.

=item C<-E>

By default, I<textmail> replaces MS Excel attachments with CSV file
attachments that contain just the data within the original document. This
option leaves MS Excel attachments intact.

=item C<-H>

By default, I<textmail> replaces HTML attachments with inline plain text
attachments that contain just the plain text within the original HTML
attachment. It also replaces text-versus-HTML alternative attachments with
the HTML alternative translated to plain text. This option leaves HTML (and
alternative) attachments intact.

=item C<-R>

By default, I<textmail> replaces RTF attachments with inline plain text
attachments that contain just the plain text within the original document.
This option leaves RTF attachments intact.

=item C<-P>

By default, I<textmail> replaces PDF attachments with inline plain text
attachments that contain just the plain text within the original document.
This option leaves PDF attachments intact.

=item C<-U>

By default, I<textmail> replaces MS TNEF (i.e. C<winmail.dat>) attachments
with the attachments contained therein which are then translated to text as
normal. This option leaves C<winmail.dat> attachments intact. This option,
together with the C<-!> option will cause winmail.dat attachments to be
deleted rather than translated.

=item C<-L>

By default, I<textmail> replaces C<multipart/appledouble> attachments with
just the data fork attachment contained therein which is then translated to
text as normal. This option leaves appledouble attachments intact. However,
the data fork attachment will still be translated as normal resulting in a
probably inappropriate and possibly broken resource fork attachment.
Therefore, this option should probably only be used in conjunction with
other options that suppress the translation of the data fork attachment.

=item C<-I>

By default, I<textmail> deletes image attachments. This option leaves image
attachments intact.

=item C<-A>

By default, I<textmail> deletes audio attachments. This option leaves audio
attachments intact.

=item C<-V>

By default, I<textmail> deletes video attachments. This option leaves video
attachments intact.

=item C<-X>

By default, I<textmail> deletes attachments containing MS Windows
executables. That means attachments with the following filename extensions:
C<com>, C<exe>, C<pif>, C<dll>, C<ocx>, C<scr>, C<vbs>, C<js>, C<bat> and
C<ps1>. This option leaves MS Windows executable attachments intact. To
delete C<zip> files as well, you could use either the C<-O> option or the
C<-!> option.

=item C<-B>

By default, when text is encountered that is C<base64>-encoded, I<textmail>
will recode it as either C<7bit> or C<quoted-printable>, whichever is
appropriate. This option suppresses this recoding. Note that if the text is
large enough and contains a high enough proportion of non-ASCII characters,
it will remain C<base64>-encoded to minimise space.

=item C<-S>

When translating attachments, I<textmail> replaces any bad filename
characters such as space characters with the underscore character. This
option causes underscore characters to subsequently be converted into space
characters. In other words, you can use this option to preserve space
characters in attachment filenames (other bad filename characters will then
be converted to spaces as well).

=item C<-Z>

By default, I<textmail> will not translate C<multipart/signed> attachments.
This option causes C<multipart/signed> attachments to be replaced by the
signed attachment contained therein, discarding the signature control data.
The no-longer-signed data is then translated to text as normal. Note that
C<multipart/encrypted> attachments are never translated.

=item C<-O>

Delete all C<application/octet-stream> attachments, not just MS Windows
executables. Note that this overrides C<-X> but C<-K> overrides this.

=item C<-!>

Delete all C<application/*> attachments. Note that this overrides C<-X> but
C<-K> overrides this. Also note that translated documents are no longer
C<application/*> attachments so they aren't deleted unless their translation
is suppressed with the appropriate command line option.

=item C<-D> I<hdrs>

Delete selected headers. The I<hdrs> argument is a comma-separated list of
header name prefixes and/or the names of files containing header name
prefixes (blank lines, leading or trailing whitespace, and shell-style
comments are ignored). For example, C<textmail -DX-> deletes all headers
whose names begin with C<X->.

=item C<-K> I<types>

By default, I<textmail> deletes several types of non-text attachment. The
C<-O> and C<-!> options delete even more. This option specifies, by mimetype
and/or filename extension, the set of attachments not to delete. This
overrides all deletions.

The I<types> argument is a comma-separated list of mimetypes and/or filename
extensions and/or the names of files containing mimetypes and/or filename
extensions (blank lines, leading or trailing whitespace, and shell-style
comments are ignored). Note that the elements are interpreted as a complete
mimetype, if they contain a slash character, or as either the C<*> in
C<application/*> or as a filename extension if they do not contain a slash
character. For example, C<textmail -Wf!Kdoc,docx> deletes all
C<application/*> attachments except MS Word documents.

=item C<-F> I<types>

This option specifies, by mimetype and/or filename extension, the set of
attachments to save to files on disk. This happens before any translations
or deletions in the email message itself. By default, attachments are saved
to the current directory. The C<-G> option can be used to specify an
alternative directory to save attachments to.

The I<types> argument is a comma-separated list of mimetypes and/or filename
extensions and/or the names of files containing mimetypes and/or filename
extensions (blank lines, leading or trailing whitespace, and shell-style
comments are ignored). Note that the elements are interpreted as a complete
mimetype, if they contain a slash character, or as either the C<*> in
C<application/*> or as a filename extension if they do not contain a slash
character. For example, C<-F doc,docx> saves MS Word documents to the
current directory.

=item C<-G> I<path>

This option specifies the directory to save attachments to when used with
the C<-F> option. Without this option, attachments are saved to the current
directory.

=item C<-C> I<spec>

This option specifies custom translations for attachments with particular
mimetypes or filename extensions.

The I<spec> argument is a comma-separated list of translation specifiers
and/or the names of files containing translation specifiers (blank lines,
leading or trailing whitespace, and shell-style comments are ignored).

Each translation specifier contains two or three colon-separated items: the
mimetype or filename extension of the attachments to translate; the
(optional) filename extension to use for the resulting translated
attachment; and the simple shell command to translate the attachment. The
shell command must read the file specified on the command line, and write
the resulting translated attachment to its standard output. For example:

    -C text/calendar:txt:vcalendar-filter
    -C text/calendar:vcalendar-filter

The mimetypes, filename extensions and shell commands must not contain any
comma or colon characters or C<nul> bytes. If the optional filename
extension to use for the resulting translated attachment is not supplied, it
is assumed to be C<"txt">.

=item C<-Y>

By default, unless the C<-H> option is given, I<textmail> replaces
text-versus-HTML alternative attachments with the HTML alternative
translated to plain text.

Earlier versions of I<textmail> would replace them with just the plain text
alternative. Unfortunately, the plain text alternative is sometimes a
vestigial attachment that isn't a real text alternative of the HTML content.
It is often just a message indicating that you should be reading the HTML
alternative instead. So that's what I<textmail> does by default now.

This option causes I<textmail> to mostly revert to its original behaviour
and replace text-versus-HTML alternative attachments with just the plain
text alternative. However, if I<textmail> can identify the plain text
alternative as a vestigial attachment that isn't a real plain text
alternative, it will translate the HTML alternative instead, so as not to
discard the content of the attachment.

I<textmail> will identify a plain text alternative as vestigial if it is
empty or extremely short, or if it contains any of the following pieces of
text:

    Please view this email in an application that supports HTML
    We have tried to send you this email as HTML
    Your email client does not support HTML email
    Your email client cannot read this email
    This email must be viewed in HTML mode
    Please enable HTML

This option is not recommended. Translating the HTML alternative is probably
always a better choice. Any URLs referred to by the HTML alternative will be
preserved in the translation to plain text. This is usually not the case in
the original plain text alternative. This option is only provided in
accordance with the principle that any feature that can't be turned off is a
bug.

If the C<-H> option is supplied, this option does nothing.

If the above list of patterns is insufficient, the C<-Q> option can be used
as well to include additional patterns to identify vestigial text
alternatives.

=item C<-Q> I<spec>

This option specifies custom patterns for identifying vestigial text
alternatives. It is only useful when the C<-Y> option is used to choose
plain text alternatives over translated HTML alternatives. If the C<-Y>
option is not supplied, this option does nothing.

The I<spec> argument is a comma-separated list of patterns (i.e. a short
piece of text that would appear in a vestigial text alternative), and/or the
names of files containing patterns (blank lines, leading or trailing
whitespace, and shell-style comments are ignored.

=item C<-f>

Whenever I<textmail> is unable to translate any attachment into text, it
will leave the attachment intact. This happens when the requisite
translation software can't be found, when it runs but returns an error code,
and when it produces no output. It also happens when C<winmail.dat>
attachments are corrupt. This option causes the empty translation to take
the place of the original attachment. Only the name of the attachment is
preserved. This is needed to ensure plain text even in the face of an MS
Word document that contains no text (e.g. only images).

=item C<-?>

Print the paths of all helper applications then exit.

=back

=head1 EXAMPLES

A I<procmail(1)> recipe that insists on pure text and no C<X-> headers (with
output in mailbox format):

  :0 fw
  | textmail -Mf!DX-

Do the same but to an existing mailbox file:

  textmail -Mf!DX- < mailbox > mailbox-as-text

Delete all C<application/*> attachments except for PostScript and PDF (and
don't translate PDF into text):

  textmail -!PKps,pdf

Delete all C<application/*> attachments except for zip files and gzipped tar
files:

  textmail -!Ktar.gz,zip

A I<procmail(1)> recipe that just unpacks winmail.dat attachments but
doesn't translate the attachments contained therein into text and doesn't
delete windows executables (with output in mailbox format):

  :0 fw
  | textmail -MWEHRPLIAVXBS

Save MS Word, MS Excel, and PDF attachments in C</tmp> without changing the message:

  textmail -WEHRPULIAVXBS -F doc,docx,xls,xlsx,pdf -G /tmp

Save MS Word, MS Excel, and PDF attachments in C</tmp> without changing the message
(other than translating C<winmail.dat> attachments into standard attachments):

  textmail -WEHRPLIAVXBS -F doc,docx,xls,xlsx,pdf -G /tmp

Replace text/calendar and text/vcard attachments with plain text:

  textmail -WEHRPULIAVXBS -C text/calendar:vcalendar-filter,text/vcard:mutt.vcard.filter

=head1 REQUIREMENTS

Modern MS Word attachments (C<.docx>) are translated into plain text using
I<docx2txt(1)>. If I<textmail> can't find I<docx2txt(1)>, then modern MS
Word attachments are left intact. So make sure that I<docx2txt(1)> is
installed and in the C<$PATH>.

Traditional MS Word and RTF attachments (C<.doc> and C<.rtf>) are translated
into plain text using I<antiword(1)> or I<catdoc(1)>. If I<textmail> can't
find I<antiword(1)> or I<catdoc(1)>, then traditional MS Word and RTF
attachments are left intact. So make sure that I<antiword(1)> and/or
I<catdoc(1)> is installed and in the C<$PATH>.

Modern MS Excel attachments (C<.xlsx>) are translated into csv files using
I<xlsx2csv(1)>. If I<textmail> can't find I<xlsx2csv(1)>, then modern MS
Excel attachments are left intact. So make sure that I<xls2xcsv(1)> is
installed and in the C<$PATH>.

Traditional MS Excel attachments (C<.xls>) are translated into csv files
using I<xls2csv(1)>. If I<textmail> can't find I<xls2csv(1)>, then
traditional MS Excel attachments are left intact. So make sure that
I<xls2csv(1)> is installed and in the C<$PATH>.

HTML attachments are translated into plain text using I<lynx(1)>. If
I<textmail> can't find I<lynx(1)>, then HTML attachments are left intact. So
make sure that I<lynx(1)> is installed and in the C<$PATH>.

PDF attachments are translated into plain text using I<pdftotext(1)>. If
I<textmail> can't find I<pdftotext(1)>, then PDF attachments are left
intact. So make sure that I<pdftotext(1)> is installed and in the C<$PATH>.

I<textmail> also requires I<perl(1)> and I<pod2man(1)> and I<pod2html(1)>
(which come with I<perl(1)>) and I<mktemp(1)>.

If I<textmail> fails to create a temporary directory, or if it is instructed
to do nothing (i.e. C<textmail -WEHRPULIAVXBS>), then it degenerates into
I<cat(1)>.

=head1 CAVEAT

The latest version of I<catdoc>'s I<xls2csv(1)> at the time of writing (i.e.
catdoc-0.93.3) loses data. There are alternatives.

If I<textmail> is unable to create a temporary directory (in C</tmp>), then
it degenerates into I<cat(1)>. Without a temporary directory, no attachments
will be translated or deleted no matter what options (even C<-f>) were given
to I<textmail>. So make sure that C</tmp> is writable. Also make sure that
I<mktemp(1)> is available otherwise an insecure temporary directory will be
created.

=head1 SEE ALSO

I<procmail(1)>,
I<docx2txt(1)>,
I<antiword(1)>,
I<catdoc(1)>,
I<xlsx2csv(1)>,
I<xls2csv(1)>,
I<lynx(1)>,
I<pdftotext(1)>,
I<pod2man(1)>,
I<pod2html(1)>,
I<vcalendar-filter>,
I<mutt.vcard.filter>,
C<https://raf.org/minimail>

=head1 AUTHOR

20230313 raf <raf@raf.org>

=head1 URL

L<https://raf.org/textmail>,
L<https://github.com/raforg/textmail>,
L<https://codeberg.org/raforg/textmail>

=cut

# Functions from minimail: see http://raf.org/minimail/

sub formail # rfc2822 + mboxrd format (see http://www.qmail.org/man/man5/mbox.html)
{
	sub mime # rfc2045, rfc2046
	{
		my ($mail, $parent) = @_;
		return $mail unless exists $mail->{header} && exists $mail->{header}->{'content-type'} || defined $parent && exists $parent->{mime_type} && $parent->{mime_type} =~ /^multipart\/digest$/i;
		my ($content_type) = (exists $mail->{header} && exists $mail->{header}->{'content-type'}) ? @{$mail->{header}->{'content-type'}} : "Content-Type: message/rfc822\n";
		my ($type) = $content_type =~ /^content-type:\s*([\w\/.-]+)/i;
		my $boundary = param($mail, 'content-type', 'boundary') if $type =~ /^multipart\//i;
		return $mail unless defined $type && ($type =~ /^multipart\//i && $boundary || $type =~ /^message\/rfc822$/i);
		($mail->{mime_boundary}) = $boundary =~ /^(.*\S)/ if $boundary;
		$mail->{mime_type} = $type;
		$mail->{mime_message} = mimepart(delete $mail->{body} || '', $mail), return $mail if $type =~ /^message\/(?:rfc822|external-body)$/i;
		return mimeparts($mail, $parent);
	}

	sub mimeparts
	{
		my ($mail, $parent) = @_;
		my $state = 'preamble';
		my $text = '';

		for (split /(?<=\n)/, delete $mail->{body} || '')
		{
			if (/^--\Q$mail->{mime_boundary}\E(--)?/)
			{
				if ($state eq 'preamble')
				{
					$state = 'part';
					$mail->{mime_preamble} = $text if length $text;
				}
				elsif ($state eq 'part')
				{
					$state = 'epilogue' if defined $1 && $1 eq '--';
					push @{$mail->{mime_parts}}, mimepart($text, $mail);
				}

				$text = '', next;
			}

			$text .= $_;
		}

		push @{$mail->{mime_parts}}, mimepart($text, $mail) if $state eq 'part' && length $text;
		$mail->{mime_epilogue} = $text if $state eq 'epilogue' && length $text;
		return $mail;
	}

	sub mimepart
	{
		my ($mail, $parent) = @_;
		my @lines = split /(?<=\n)/, $mail;
		formail(sub { shift @lines }, sub { $mail = shift }, $parent);
		return $mail;
	}

	my ($rd, $act, $parent) = @_;
	my $state = 'header';
	my $mail; my $last;

	while (defined($_ = $rd->()))
	{
		s/\r(?=\n)//g; #, tr/\r/\n/;

		if (!defined $parent && /^From (?:\S+\s+)?\s*[a-zA-Z]+\s+[a-zA-Z]+\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s+(?:[A-Z]+\s+)?\d{4}/) # mbox header
		{
			$mail->{body} =~ s/\n\n\z/\n/ if $mail && exists $mail->{mbox} && exists $mail->{body};
			my $mbox = $_; $act->(mime($mail, $parent)) or return if $mail;
			$mail = { mbox => $mbox }, $state = 'header', undef $last, next;
		}

		if ($state eq 'header')
		{
			if (/^([\w-]+):/) # mail header
			{
				push @{$mail->{headers}}, $_;
				push @{$mail->{header}->{$last = lc $1}}, $_;
			}
			elsif (/^$/) # blank line after mail headers
			{
				$mail->{body} = '', $state = 'body';
			}
			else # mail header continuation or error
			{
				${$mail->{headers}}[$#{$mail->{headers}}] .= $_ if defined $last;
				${$mail->{header}->{$last}}[$#{$mail->{header}->{$last}}] .= $_ if defined $last;
			}
		}
		elsif ($state eq 'body')
		{
			s/^>(>*From )/$1/ if exists $mail->{mbox};
			$mail->{body} .= $_;
		}
	}

	$mail->{body} =~ s/\n\n\z/\n/ if $mail && exists $mail->{mbox} && exists $mail->{body};
	$act->(mime($mail, $parent)) if $mail;
}

sub mail2str
{
	my $mail = shift;
	my $head = '';
	$head .= $mail->{mbox} if exists $mail->{mbox};
	$head .= join '', @{$mail->{headers}} if exists $mail->{headers};
	my $body = '';
	$body .= $mail->{body} if exists $mail->{body};
	$body .= "$mail->{mime_preamble}" if exists $mail->{mime_preamble};
	$body .= "--$mail->{mime_boundary}\n" if exists $mail->{mime_boundary} && !exists $mail->{mime_parts};
	$body .= join('', map { "--$mail->{mime_boundary}\n" . mail2str($_) } @{$mail->{mime_parts}}) if exists $mail->{mime_parts};
	$body .= "--$mail->{mime_boundary}--\n" if exists $mail->{mime_boundary};
	$body .= "$mail->{mime_epilogue}" if exists $mail->{mime_epilogue};
	$body .= mail2str($mail->{mime_message}) if exists $mail->{mime_message};
	$body =~ s/^(>*From )/>$1/mg, $body =~ s/([^\n])\n?\z/$1\n\n/ if exists $mail->{mbox};
	return $head . "\n" . $body;
}

my $bchar = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'()+_,-.\/:=?";
sub mail2multipart
{
	my $m = shift;
	return $m if exists $m->{mime_type} && $m->{mime_type} =~ /^multipart\//i;
	my $p = {};
	append_header($p, $_) for grep { /^content-/i } @{$m->{headers}};
	$p->{body} = delete $m->{body} if exists $m->{body};
	$p->{mime_message} = delete $m->{mime_message} if exists $m->{mime_message};
	$p->{mime_type} = $m->{mime_type} if exists $m->{mime_type};
	$m->{mime_type} = 'multipart/mixed';
	$m->{mime_boundary} = exists $m->{mime_prev_boundary} ? delete $m->{mime_prev_boundary} : join '', map { substr $bchar, int(rand(length $bchar)), 1 } 0..30;
	$m->{mime_preamble} = delete $m->{mime_prev_preamble} if exists $m->{mime_prev_preamble};
	$m->{mime_epilogue} = delete $m->{mime_prev_epilogue} if exists $m->{mime_prev_epilogue};
	delete_header($m, qr/content-[^:]*/i);
	append_header($m, 'MIME-Version: 1.0') unless exists $m->{header} && exists $m->{header}->{'mime-version'};
	append_header($m, "Content-Type: $m->{mime_type}; boundary=\"$m->{mime_boundary}\"");
	$m->{mime_parts} = [$p];
	return $m;
}

sub mail2singlepart
{
	my $m = shift;
	$m->{mime_message} = mail2singlepart($m->{mime_message}), return $m if exists $m->{mime_type} && $m->{mime_type} =~ /^message\//i;
	return $m unless exists $m->{mime_type} && $m->{mime_type} =~ /^multipart\//i && @{$m->{mime_parts}} <= 1;
	my $p = shift @{$m->{mime_parts}};
	$m->{mime_prev_boundary} = delete $m->{mime_boundary} if exists $m->{mime_boundary};
	$m->{mime_prev_preamble} = delete $m->{mime_preamble} if exists $m->{mime_preamble};
	$m->{mime_prev_epilogue} = delete $m->{mime_epilogue} if exists $m->{mime_epilogue};
	$m->{body} = $p->{body} if exists $p->{body};
	$m->{mime_message} = $p->{mime_message} if exists $p->{mime_message};
	delete $m->{mime_type}; $m->{mime_type} = $p->{mime_type} if exists $p->{mime_type};
	delete $m->{mime_parts}; $m->{mime_parts} = $p->{mime_parts} if exists $p->{mime_parts};
	$m->{mime_boundary} = $p->{mime_boundary} if exists $p->{mime_boundary};
	$m->{mime_preamble} = $p->{mime_preamble} if exists $p->{mime_preamble};
	$m->{mime_epilogue} = $p->{mime_epilogue} if exists $p->{mime_epilogue};
	my $explicit = 0;
	delete_header($m, qr/content-[^:]*/i);
	append_header($m, $_), ++$explicit for grep { /^content-/i } @{$p->{headers}};
	delete_header($m, 'mime-version') unless $explicit;
	return mail2singlepart($m);
}

sub mail2mbox
{
	my $m = shift;
	return $m if exists $m->{mbox};
	my ($f) = header($m, 'sender');
	($f) = header($m, 'from') unless defined $f;
	$f =~ s/"(?:\\[^\r\n]|[^\\"])*"//g, $f =~ s/\s*;.*//, $f =~s/^[^:]+:\s*//, $f =~ s/\s*,.*$//, $f =~ s/^[^<]*<\s*//, $f =~ s/\s*>.*$// if defined $f;
	$f = 'unknown' unless defined $f;
	use POSIX; $m->{mbox} = "From $f  " . ctime(time());
	return $m;
}

sub append_header
{
	my ($m, $h, $l, $c) = @_;
	$h = header_format($h, $l, $c);
	my ($n) = $h =~ /^([^:]+):/;
	push @{$m->{headers}}, $h;
	push @{$m->{header}->{lc $n}}, $h;
}

sub delete_header
{
	my ($m, $h, $r) = @_;
	return undef unless exists $m->{header};
	@{$m->{headers}} = grep { !/^$h:/i } @{$m->{headers}};
	delete $m->{header}->{$_} for grep { /^$h$/i } keys %{$m->{header}};
	if ($r && exists $m->{mime_parts}) { delete_header($_, $h, $r) for @{$m->{mime_parts}} }
	if ($r && exists $m->{mime_message}) { delete_header($m->{mime_message}, $h, $r) }
}

sub header
{
	my ($m, $h) = @_;
	return () unless exists $m->{header} && exists $m->{header}->{lc $h};
	return map { s/\n\s+/ /g; $_ = header_display($_); /^$h:\s*(.*)\s*$/i; $1 } @{$m->{header}->{lc $h}};
}

my $encword = qr/=\?(us-ascii|iso-8859-\d)(?:\*\w+)?\?(q|b)\?([^? ]+)\?=/i; # encoded words to display (should really only decode ascii)
sub header_display # rfc2047, rfc2231
{
	return join '',
		map { tr/ \t/ /s; $_ } # finally, squeeze multiple whitespace
		map { tr/\x00-\x08\x0b-\x1f\x7f//d; $_ } # strip control characters
		map { s/$encword/lc $2 eq 'q' ? join ' ', split '_', decode_quoted_printable($3), -1 : decode_base64($3)/ieg; $_ } # decode encoded words
		map { s/($encword)\s+($encword)/$1$5/g while /$encword\s+$encword/; $_ } # strip space between encoded words that we're about to decode
		map { s/\((?:\\[^\r\n]|[^\\()])*\)//g unless /^".*"$/; $_ } # strip (comments) outside "quoted strings"
		split /("(?:\\[^\r\n]|[^\\"])*")/, shift; # split on "quoted strings"
}

sub header_format # rfc2822, rfc2047
{
	my ($h, $l, $c) = @_;
	$h =~ s/^\s+//, $h =~ s/\s+$//, $h =~ tr/ \t\n\r/ /s;
	$h = join ' ', map { /^".*"$/ ? $_ : !tr/\x80-\xff// ? $_ : tr/a-zA-Z0-9!*\/+-//c > length >> 1 ? join(' ', map { '=?' . ($c || 'iso-8859-1') . ($l ? "*$l" : '') . '?b?' . substr(encode_base64($_), 0, -1) . '?=' } (split /\n/, (s/([^\r\n]{38})/$1\n/g, $_))) : join(' ', map { '=?' . ($c || 'iso-8859-1') . ($l ? "*$l" : '') . '?q?' . substr(encode_quoted_printable($_), 0, -2) . '?=' } (split /\n/, (s/([^\r\n]{17})/$1\n/g, $_))) } map { /^[^\s"]*".*"[^\s"]*$/ ? $_ : split / / } split /(\S*"(?:\\[^\r\n]|[^\\"])*"\S*)/, $h;
	my ($f, $p, $lf) = ('', 0); $lf = length $f, $f .= ($lf && $lf + ($lf ? 1 : 0) + length($_) - $p > 78) ? ($p = $lf, "\n") : '', $f .= $f ? ' ' : '', $f .= $_ for map { /^\S*".*"\S*$/ ? $_ : grep { length } split / / } split /(\S*"(?:\\[^\r\n]|[^\\"\r\n])*"\S*)/, $h; # fold
	return $f . "\n";
}

sub param # rfc2231, rfc2045
{
	my ($m, $h, $p) = @_;
	my @p; my $decode = 0;

	for (header($m, $h))
	{
		while (/(\b\Q$p\E(?:\*|\*\d\*?)?)=("(?:\\[^\n]|[^"\n])*"|[^\x00-\x20()<>@,;:\\"\/\[\]?=]+)/ig)
		{
			my ($n, $v) = ($1, $2);
			$v =~ s/^"//, $v =~ s/"$//, $v =~ s/\\(.)/$1/g if $v =~ /^".*"$/;
			$v =~ s/^(?:us-ascii|iso-8859-\d)'\w+'//i and $decode = 1;
			$v =~ s/%([\da-fA-f]{2})/chr hex $1/eg if $decode && substr($n, -1) eq '*';
			push @p, [lc $n, $v];
		}
	}

	return join '', map { $_->[1] } sort { my ($ad) = $a->[0] =~ /(\d+)/; my ($bd) = $b->[0] =~ /(\d+)/; $ad <=> $bd } @p;
}

sub mimetype # rfc2045, rfc2046
{
	my ($m, $p) = @_;
	my ($e) = header($m, 'content-transfer-encoding');
	return 'application/octet-stream' if defined $e && $e !~ /^(?:[78]bit|binary|quoted-printable|base64)$/i;
	my ($type) = header($m, 'content-type');
	return lc $1 if defined $type && $type =~ /^((?:text|image|audio|video|application|message|multipart)\/[^\s;]+)/i;
	return 'message/rfc822' if !defined $type && defined $p && exists $p->{mime_type} && $p->{mime_type} =~ /^multipart\/digest/i;
	return 'text/plain';
}

sub encoding # rfc2045
{
	my $m = shift;
	my ($e) = header($m, 'content-transfer-encoding');
	return (defined $e && $e =~ /^([78]bit|binary|quoted-printable|base64)$/i) ? lc $1 : (exists $m->{body} && $m->{body} =~ tr/\x80-\xff//) ? '8bit' : '7bit';
}

my $filename_counter;
sub filename # rfc2183, rfc2045?
{
	my $p = shift;
	my $fn = param($p, 'content-disposition', 'filename') || param($p, 'content-type', 'name') || 'attachment' . ++$filename_counter;
	$fn =~ s/^.*[\\\/]//, $fn =~ tr/\x00-\x1f !"#\$%&'()*\/:;<=>?@[\\]^`{|}~\x7f/_/s;
	return $fn;
}

sub body
{
	my $m = shift;
	return exists $m->{body} ? decode($m->{body}, encoding($m)) : undef;
}

sub parts
{
	my ($m, $p) = @_;
	return [@{$m->{mime_parts}}] unless defined $p;
	$m->{mime_parts} = [@{$p}];
}

sub newparam # rfc2231, rfc2045
{
	my ($n, $v, $l, $c) = (@_, '', '');
	my $high = $v =~ tr/\x80-\xff//;
	my $ctrl = $v =~ tr/\x00-\x06\x0e-\x1f\x7f//;
	my $enc = $high || $ctrl ? '*' : '';
	$c = ('high' ? 'iso-8859-1' : 'us-ascii') if $enc && !$c;
	$l = 'en' if $c && !$l;
	$v = "$c'$l'$v" if $enc;
	my @p; push @p, $_ while $_ = substr $v, 0, 40, '';
	s/([\x00-\x20\x7f-\xff])/sprintf '%%%02X', ord $1/eg for grep { tr/\x00-\x06\x0e-\x1f\x7f-\xff// } @p;
	s/"/\\"/g, s/^/"/g, s/$/"/g for grep { tr/\x00-\x06\x0e-\x1f\x7f ()<>@,;:\\"\/[]?=// } @p;
	return "; $n$enc=$p[0]" if @p == 1;
	return join '', map { "; $n*$_$enc=$p[$_]" } 0..$#p;
}

my $messageid_counter;
sub newmail # rfc2822, rfc2045, rfc2046, rfc2183 (also rfc3282, rfc3066, rfc2424, rfc2557, rfc2110, rfc3297, rfc2912, rfc2533, rfc1864)
{
	my @a = @_; my %a = @_; my $m = {};
	sub rfc822date { use POSIX; return strftime '%a, %d %b %Y %H:%M:%S +0000', gmtime shift; }
	my $type = $a{type} || (exists $a{parts} ? 'multipart/mixed' : exists $a{message} ? 'message/rfc822' : 'text/plain');
	my $multi = $type =~ /^multipart\//i;
	my $msg = $type =~ /^message\/rfc822$/i;
	if (exists $a{filename} && !exists $a{body} && !exists $a{message} && !exists $a{parts} && -r $a{filename} && stat($a{filename}) && open my $fh, '<', $a{filename})
	{
		$a{body} = do { local $/; my $b = <$fh>; close $fh; $b };
		$a{created} = (exists $a{created}) ? $a{created} : rfc822date((stat _)[9]);
		$a{modified} = (exists $a{modified}) ? $a{modified} : rfc822date((stat _)[9]);
		$a{read} = (exists $a{read}) ? $a{read} : rfc822date((stat _)[8]);
		$a{size} = (stat _)[7];
	}
	($a{filename}) = $a{filename} =~ /([^\\\/]+)$/ if $a{filename};
	my $bound = $multi ? join '', map { substr $bchar, int(rand(length $bchar)), 1 } 0..30 : '';
	my $disp = $a{disposition} || ($type =~ /^(?:text\/|message\/rfc822)/i ? 'inline' : 'attachment');
	my $char = $a{charset} || ($a{body} && $a{body} =~ tr/\x80-\xff// ? 'iso-8859-1' : 'us-ascii');
	my $enc = $a{encoding} || ($multi || $msg ? '7bit' : $a{body} ? choose_encoding($a{body}) : '7bit');
	append_header($m, $a[$_] . ': ' . $a[$_ + 1]) for grep { $_ % 2 == 0 && $a[$_] =~ /^[A-Z]/ } 0..$#a;
	append_header($m, 'Date: ' . rfc822date(time)) if grep { /^(?:date|from|sender|reply-to)$/i } keys %a and !grep { /^date$/i } keys %a;
	append_header($m, 'MIME-Version: 1.0') if grep { /^(?:date|from|sender|reply-to)$/i } keys %a and !grep { /^mime-version$/ } keys %a;
	use Sys::Hostname; append_header($m, "Message-ID: <@{[time]}.$$.@{[++$messageid_counter]}\@@{[hostname]}>") if grep { /^(?:date|from|sender|reply-to)$/i } keys %a and !grep { /^message-id$/i } keys %a;
	append_header($m, "Content-Type: $type" . ($bound ? newparam('boundary', $bound) : '') . ($char =~ /^us-ascii$/i ? '' : newparam('charset', $char))) unless $type =~ /^text\/plain$/i && $char =~ /^us-ascii$/i;
	append_header($m, "Content-Transfer-Encoding: $enc") unless $enc =~ /^7bit$/i;
	append_header($m, "Content-Disposition: $disp" . ($a{filename} ? newparam('filename', $a{filename}) : '') . ($a{size} ? newparam('size', $a{size}) : '') . ($a{created} ? newparam('creation-date', $a{created}) : '') . ($a{modified} ? newparam('modification-date', $a{modified}) : '') . ($a{read} ? newparam('read-date', $a{read}) : '')) if $a{filename} || $a{size} || $a{created} || $a{modified} || $a{read};
	append_header($m, "Content-@{[ucfirst $_]}: $a{$_}") for grep { $a{$_} } qw(description language duration location base features alternative);
	append_header($m, "Content-@{[uc $_]}: $a{$_}") for grep { $a{$_} } qw(id md5);
	($m->{mime_type}, $m->{mime_boundary}, $m->{mime_parts}) = ($type =~ /^\s*([\w\/.-]+)/, $bound, $a{parts} || []) if $multi;
	($m->{mime_type}, $m->{mime_message}) = ($type =~ /^\s*([\w\/.-]+)/, $a{message} || {}) if $msg;
	$m->{body} = encode($a{body} || '', $enc) unless $multi || $msg;
	$m->{mbox} = $a{mbox} if exists $a{mbox} && defined $a{mbox} && length $a{mbox};
	return $m;
}

sub decode
{
	my ($d, $e) = @_;
	return $e =~ /^base64$/i ? decode_base64($d) : $e =~ /^quoted-printable$/i ? decode_quoted_printable($d) : substr($d, 0, -1);
}

sub encode
{
	my ($d, $e) = @_;
	return $e =~ /^base64$/i ? encode_base64($d) : $e =~ /^quoted-printable$/i ? encode_quoted_printable($d) : $d . "\n";
}

sub choose_encoding # rfc2822, rfc2045
{
	my $len = length $_[0];
	my $high = $_[0] =~ tr/\x80-\xff//;
	my $ctrl = $_[0] =~ tr/\x00-\x06\x0e-\x1f\x7f//;
	my ($maxlen, $pos, $next) = (0, 0, 0);

	for (; ($next = index($_[0], "\n", $pos)) != -1; $pos = $next + 1)
	{
		$maxlen = $next - $pos if $next - $pos > $maxlen;
	}

	$maxlen = $len - $pos if $len - $pos > $maxlen;
	return $ctrl ? 'base64' : $high ? $len > 1024 && $high > $len * 0.167 ? 'base64' : 'quoted-printable' : $maxlen > 998 ? 'quoted-printable' : '7bit';
}

sub encode_base64 # MIME::Base64 (Gisle Aas)
{
	pos $_[0] = 0; # Note: Text must be in canonical form (i.e. with "\r\n")
	my $padlen = (3 - length($_[0]) % 3) % 3;
	my $encoded = join '', map { pack('u', $_) =~ /^.(\S*)/ } $_[0] =~ /(.{1,45})/gs;
	$encoded =~ tr{` -_}{AA-Za-z0-9+/};
	$encoded =~ s/.{$padlen}$/'=' x $padlen/e if $padlen;
	$encoded =~ s/(.{1,76})/$1\n/g;
	return $encoded;
}

sub decode_base64 # MIME::Base64 (Gisle Aas)
{
	my $data = shift;
	$data =~ tr{A-Za-z0-9+=/}{}cd;
	$data =~ s/=+$//;
	$data =~ tr{A-Za-z0-9+/}{ -_};
	return join '', map { unpack('u', chr(32 + length($_) * 3 / 4) . $_) } $data =~ /(.{1,60})/gs;
}

sub encode_quoted_printable
{
	my $quoted = shift;
	my $binary = ($quoted =~ tr/\x00-\x06\x0e-\x1f\x7f//) ? '' : '\r\n';
	$quoted =~ s/([^!-<>-~ \t$binary])/sprintf '=%02X', ord $1/eg;
	$quoted =~ s/((?:[^\r\n]{73,75})(?=[=])|(?:[^\r\n]{75}(?=[ \t]))|(?:[^\r\n]{75})(?=[^\r\n]{2})|(?:[^\r\n]{75})(?=[^\r\n]$))/$1=\n/g;
	$quoted =~ s/([ \t])$/sprintf '=%02X', ord $1/emg;
	# Python and mutt both behave as though this is wrong
	#$quoted .= "=\n" unless $quoted =~ /\n$/;
	$quoted .= "\n";
	return $quoted;
}

sub decode_quoted_printable
{
	my $quoted = shift;
	$quoted =~ tr/\x00-\x08\x0b-\x0c\x0e-\x19\x7f-\xff//d;
	$quoted =~ s/=\n//g;
	$quoted =~ s/=([0-9A-Fa-f]{2})/chr hex $1/eg;
	return $quoted;
}

my %mimetype =
(
	txt => 'text/plain', csv => 'text/csv', htm => 'text/html', html => 'text/html', vcf => 'text/vcard', ics => 'text/calendar',
	gif => 'image/gif', jpg => 'image/jpeg', jpeg => 'image/jpeg', jpe => 'image/jpeg', png => 'image/png', bmp => 'image/bmp', tiff => 'image/tiff', tif => 'image/tiff', jp2 => 'image/jp2', jpf => 'image/jpx', jpm => 'image/jpm',
	mp2 => 'audio/mpeg', mp3 => 'audio/mpeg', au => 'audio/au', aif => 'audio/x-aiff', wav => 'audio/wav',
	mpeg => 'video/mpeg', mpg => 'video/mpeg', mpe => 'video/mpeg', qt => 'video/quicktime', mov => 'video/quicktime', avi => 'video/x-msvideo', mj2 => 'video/mj2',
	rtf => 'application/rtf', wri => 'application/vnd.ms-word', pdf => 'application/pdf', ps => 'application/ps', eps => 'application/ps', zip => 'application/zip', other => 'application/octet-stream',
	doc => 'application/msword',
	dot => 'application/msword',
	docx => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
	dotx => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
	docm => 'application/vnd.ms-word.document.macroEnabled.12',
	dotm => 'application/vnd.ms-word.template.macroEnabled.12',
	xls => 'application/vnd.ms-excel',
	xlt => 'application/vnd.ms-excel',
	xla => 'application/vnd.ms-excel',
	xlsx => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
	xltx => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
	xlsm => 'application/vnd.ms-excel.sheet.macroEnabled.12',
	xltm => 'application/vnd.ms-excel.template.macroEnabled.12',
	xlam => 'application/vnd.ms-excel.addin.macroEnabled.12',
	xlsb => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
	ppt => 'application/vnd.ms-powerpoint',
	pot => 'application/vnd.ms-powerpoint',
	pps => 'application/vnd.ms-powerpoint',
	ppa => 'application/vnd.ms-powerpoint',
	pptx => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
	potx => 'application/vnd.openxmlformats-officedocument.presentationml.template',
	ppsx => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
	ppam => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
	pptm => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
	potm => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
	ppsm => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12'
);

my $add_mimetypes;
sub add_mimetypes
{
	return if $add_mimetypes++;
	open my $fh, '<', '/etc/mime.types' or return;

	while (<$fh>)
	{
		s/#.*$//, s/^\s+//, s/\s+$//; next unless $_;
		my ($mimetype, $ext) = /^(\S+)\s+(.*)$/; next unless $ext;
		$mimetype{$_} = $mimetype for split /\s+/, $ext;
	}

	close $fh;
}

sub MESSAGE { 1 }
sub ATTACHMENT { 2 }
sub MESSAGE_CLASS { 0x00078008 }
sub ATTACH_ATTACHMENT { 0x00069005 }
sub ATTACH_DATA { 0x0006800f }
sub ATTACH_FILENAME { 0x00018010 }
sub ATTACH_RENDDATA { 0x00069002 }
sub ATTACH_MODIFIED { 0x00038013 }
my $data; my @attachment; my $attachment; my $pos; my $badtnef;

sub winmail
{
	sub read_message_attribute
	{
		my $type = unpack 'C', substr $data, $pos, 1;
		return 0 unless defined $type && $type == MESSAGE; ++$pos;
		my $id = unpack 'V', substr $data, $pos, 4; $pos += 4;
		my $len = unpack 'V', substr $data, $pos, 4; $pos += 4;
		++$badtnef, return 0 if $pos + $len > length $data;
		my $buf = substr $data, $pos, $len; $pos += $len;
		my $chk = unpack 'v', substr $data, $pos, 2; $pos += 2;
		my $tot = unpack '%16C*', $buf;
		++$badtnef unless $chk == $tot;
		return $chk == $tot;
	}

	sub read_attribute_message_class
	{
		my $type = unpack 'C', substr $data, $pos, 1;
		return unless defined $type && $type == MESSAGE;
		my $id = unpack 'V', substr $data, $pos + 1, 4;
		return unless $id == MESSAGE_CLASS; $pos += 5;
		my $len = unpack 'V', substr $data, $pos, 4; $pos += 4;
		++$badtnef, return if $pos + $len > length $data;
		my $buf = substr $data, $pos, $len; $pos += $len;
		my $chk = unpack 'v', substr $data, $pos, 2; $pos += 2;
		my $tot = unpack '%16C*', $buf;
		++$badtnef unless $chk == $tot;
	}

	sub read_attachment_attribute
	{
		my $type = unpack 'C', substr $data, $pos, 1;
		return 0 unless defined $type && $type == ATTACHMENT; ++$pos;
		my $id = unpack 'V', substr $data, $pos, 4; $pos += 4;
		++$badtnef if $id == ATTACH_RENDDATA && @attachment && !exists $attachment->{body};
		push @attachment, $attachment = {} if $id == ATTACH_RENDDATA;
		my $len = unpack 'V', substr $data, $pos, 4; $pos += 4;
		++$badtnef, return 0 if $pos + $len > length $data;
		my $buf = substr $data, $pos, $len; $pos += $len;
		my $chk = unpack 'v', substr $data, $pos, 2; $pos += 2;
		my $tot = unpack '%16C*', $buf;
		++$badtnef, return 0 unless $chk == $tot;
		$attachment->{body} = $buf, $attachment->{size} = length $buf if $id == ATTACH_DATA;
		$buf =~ s/\x00+$//, $attachment->{filename} = $buf, $attachment->{type} = $mimetype{($attachment->{filename} =~ /\.([^.]+)$/) || 'other'} || 'application/octet-stream' if $id == ATTACH_FILENAME && !exists $attachment->{filename};
		my $fname; $attachment->{filename} = $fname, $attachment->{type} = $mimetype{($attachment->{filename} =~ /\.([^.]+)$/) || 'other'} || 'application/octet-stream' if $id == ATTACH_ATTACHMENT && ($fname = realname($buf));
		use POSIX; sub word { unpack 'v', substr($_[0], $_[1] * 2, 2) }
		$attachment->{modified} = strftime '%a, %d %b %Y %H:%M:%S +0000', gmtime mktime word($buf, 5), word($buf, 4), word($buf, 3), word($buf, 2), word($buf, 1) - 1, word($buf, 0) - 1900 if $id == ATTACH_MODIFIED;
		return 1;
	}

	sub realname
	{
		my $buf = shift;
		my $pos = index $buf, "\x1e\x00\x01\x30\x01"; return unless $pos >= 0; $pos += 8;
		my $len = unpack 'V', substr($buf, $pos, 4); $pos += 4;
		my $name = substr($buf, $pos, $len) or return;
		$name =~ s/\x00+$//;
		return $name;
	}

	add_mimetypes();
	my $m = shift;
	$pos = 0; $data = body($m); @attachment = (); $badtnef = 0;
	my $signature = unpack 'V', substr($data, $pos, 4); $pos += 4;
	return $m unless $signature == 0x223E9F78;
	my $key = unpack 'v', substr($data, $pos, 2); $pos += 2;
	my $type = unpack 'C', substr($data, $pos, 1);
	return $m unless $type == MESSAGE || $type == ATTACHMENT;
	do {} while read_message_attribute();
	read_attribute_message_class();
	do {} while read_message_attribute();
	do {} while read_attachment_attribute();
	++$badtnef if @attachment && !exists $attachment->{body};
	return ($badtnef) ? $m : map { newmail(%{$_}) } @attachment;
}

# Documentation functions: usage and manpage (via $PAGER or as nroff or html)

my ($name) = $0 =~ /([^\/]+)$/;
my $version = '1.1';
my $date = '20230313';

$ENV{LANG} = 'C';

sub help
{
	my $rc = shift || 0;

	print
		"usage: $name [options]\n",
		"options:\n",
		"\n",
		"  --help    - Print the help message then exit\n",
		"  --version - Print the version message then exit\n",
		"  -h        - Print the help message then exit\n",
		"  -m        - Print the manpage then exit\n",
		"  -w        - Print the manpage in HTML format then exit\n",
		"  -r        - Print the manpage in nroff format then exit\n",
		"  -M        - Output in mailbox format (mboxrd)\n",
		"  -T        - Output in raw mail format (for SMTP)\n",
		"  -W        - Don't replace MS Word attachments with text\n",
		"  -E        - Don't replace MS Excel attachments with csv\n",
		"  -H        - Don't replace HTML attachments with text\n",
		"  -R        - Don't replace RTF attachments with text\n",
		"  -P        - Don't replace PDF attachments with text\n",
		"  -U        - Don't translate winmail.dat attachments\n",
		"  -L        - Don't reduce appledouble attachments\n",
		"  -I        - Don't delete image attachments\n",
		"  -A        - Don't delete audio attachments\n",
		"  -V        - Don't delete video attachments\n",
		"  -X        - Don't delete MS Windows executable attachments\n",
		"  -B        - Don't recode text that is base64-encoded\n",
		"  -S        - Don't replace spaces in filenames with underscores\n",
		"  -Z        - Do translate signed content (discards signatures)\n",
		"  -O        - Delete all application/octet-stream attachments\n",
		"  -!        - Delete all application/* attachments\n",
		"  -D hdrs   - Delete headers (list of header prefixes and filenames)\n",
		"  -K types  - Keep attachments (list of mimetypes/exts and filenames)\n",
		"  -F types  - Save attachments (list of mimetypes/exts and filenames)\n",
		"  -G path   - Directory to save attachments in (for use with -F)\n",
		"  -C spec   - Custom attachment translations (mimetype_or_ext:ext:cmd)\n",
		"  -Y        - Choose plain text alternatives over translated HTML\n",
  		"  -Q spec   - Custom patterns to identify vestigial text alternatives\n",
		"  -f        - On translation error, keep translation, not original\n",
		"  -?        - Print paths of helper applications then exit\n",
		"\n",
		"Textmail filters a mail message or mbox file, replacing MS Word, MS Excel,\n",
		"HTML, RTF and PDF attachments with the plain text contained therein. By\n",
		"default, the following attachments are also deleted: image, audio, video,\n",
		"and MS Windows executables. MS winmail.dat attachments are replaced by any\n",
		"attachments contained therein which are then replaced by text or deleted in\n",
		"the same fashion. Any of these actions can be suppressed with command line\n",
		"options. Mail headers can also be selectively deleted. Attachments can also\n",
		"be extracted and saved to disk.\n",
		"\n",
		"See the textmail(1) manpage (or textmail -m) for more information.\n",
		"\n",
		"Name: $name\n",
		"Version: $version\n",
		"Date: $date\n",
		"Author: raf <raf\@raf.org>\n",
		"URL: https://raf.org/textmail\n",
		"GIT: https://github.com/raforg/textmail\n",
		"GIT: https://codeberg.org/raforg/textmail\n",
		"\n",
		"Copyright (C) 2003-2007, 2011, 2020-2023 raf <raf\@raf.org>\n",
		"\n",
		"This is free software released under the terms of the GPLv2+:\n",
		"\n",
		"    https://www.gnu.org/licenses/\n",
		"\n",
		"There is no warranty; not even for merchantability or fitness\n",
		"for a particular purpose.\n",
		"\n",
		"Report bugs to raf <raf\@raf.org>\n";

	exit $rc;
}

sub version
{
	print "$name-$version\n";
	exit;
}

sub man
{
	system "pod2man -r '$name-$version' -s 1 -d '$date' -c 'USER COMMANDS' --quotes=none '$0' | nroff -man | " . ($ENV{PAGER} || 'more');
	exit;
}

sub nroff
{
	system "pod2man -r '$name-$version' -s 1 -d '$date' -c 'USER COMMANDS' --quotes=none '$0'";
	exit;
}

sub html
{
	system "pod2html --title '$name(1)' --noindex '$0'";
	unlink glob 'pod2htm*';
	exit;
}

sub HELP_MESSAGE
{
	help;
}

sub VERSION_MESSAGE
{
	return if $ARGV[0] eq '--help'; # This is called before HELP_MESSAGE for some reason
	version;
}

# Parse command line

my %opt;
use Getopt::Std;
help 1 unless getopts('hmrwMTWEHRPLUIAVXBSZO!D:K:F:G:C:YQ:f?', \%opt);
help if exists $opt{h};
man if exists $opt{m};
nroff if exists $opt{r};
html if exists $opt{w};
my $mailbox = exists $opt{M};
my $raw = exists $opt{T};
die "$name: The -M and -T options are incompatible\n" if $mailbox && $raw;
my $docx2txt = find('docx2txt');
my $catdoc = find('catdoc');
my $antiword = find('antiword');
$antiword = $antiword ? $catdoc ? "$antiword\x00$catdoc" : $antiword : $catdoc;
my $xlsx2csv = find('xlsx2csv');
my $xls2csv = find('xls2csv');
my $lynx = find('lynx');
my $pdftotext = find('pdftotext');
my $mktemp = find('mktemp');
paths() if exists $opt{'?'};
my @exe = qw(com exe pif dll ocx scr vbs js bat ps1);
my @image = qw(gif jpg jpeg jpe png bmp tiff tif jp2 jpf jpm);
my @audio = qw(mp2 mp3 au aif wav ogg flac);
my @video = qw(mpeg mpg mpe qt mov avi mj2 flv mp4 mkv m4v divx);
my $force = exists $opt{f};
my $remove_word = ((defined $docx2txt || defined $antiword) || $force) && ! exists $opt{W};
my $remove_excel = ((defined $xlsx2csv || defined $xls2csv) || $force) && ! exists $opt{E};
my $remove_html = (defined $lynx || $force) && ! exists $opt{H};
my $remove_rtf = (defined $catdoc || $force) && ! exists $opt{R};
my $remove_pdf = (defined $pdftotext || $force) && ! exists $opt{P};
my $remove_tnef = ! exists $opt{U};
my $remove_apple = ! exists $opt{L};
my $remove_images = ! exists $opt{I};
my $remove_audio = ! exists $opt{A};
my $remove_video = ! exists $opt{V};
my $remove_exe = ! exists $opt{X};
my $recode_base64_text = ! exists $opt{B};
my $replace_space = ' ' if exists $opt{S};
my $remove_signed = exists $opt{Z};
my $remove_octet = exists $opt{O};
my $remove_application = exists $opt{'!'};
my $remove_headers = exists $opt{D};
my @headers = get_specifiers($opt{D}) if $remove_headers;
my $keep_attachments = exists $opt{K};
my @keep = get_specifiers($opt{K}) if $keep_attachments;
my $save_attachments = exists $opt{F};
my @save = get_specifiers($opt{F}) if $save_attachments;
my $save_dirpath = (exists $opt{G}) ? $opt{G} . '/' : '';
my $remove_custom = exists $opt{C};
my @custom = get_specifiers($opt{C}) if $remove_custom;
my $choose_text_alternative = exists $opt{Y};
my @vestigial = get_specifiers($opt{Q}) if exists $opt{Q};
my $removing = $mailbox || $raw || $remove_word || $remove_excel || $remove_html || $remove_rtf || $remove_pdf || $remove_tnef || $remove_apple || $remove_images || $remove_audio || $remove_video || $remove_exe || $recode_base64_text || $remove_signed || $remove_octet || $remove_application || $remove_headers || $remove_custom;
chop(my $tmp = `$mktemp -dq /tmp/textmail.XXXXXX`) if ($removing || $save_attachments) && defined $mktemp;
if (!($removing || $save_attachments) || (($? || !defined $tmp || ! -d $tmp) && !mkdir($tmp = "/tmp/textmail.$$", 0700)))
{
	exec '/bin/cat' or print STDERR; # suppress warning
	do { local $/; print <> }; # slow cat if exec fails
	exit;
};

# Filter mail message(s) on stdin into text on stdout

formail(sub { <> }, sub
{
	my $m = mail2singlepart(textmail(mail2multipart(shift)));
	delete_header($m, qr/(?:content-length|lines)/i);
	delete $m->{mbox} if $raw;
	print mail2str($mailbox ? mail2mbox($m) : $m);
});

rmdir $tmp or system "rm -rf $tmp";

BEGIN { $SIG{INT} = $SIG{QUIT} = $SIG{TERM} = sub { rmdir $tmp or system "rm -rf $tmp" if defined $tmp; exit 1; } }

# Print paths to helper applications then exit

sub paths
{
	print(defined $docx2txt ? $docx2txt : 'docx2txt not found: Modern MS Word (.docx) will not be translated', "\n");
	print(defined $antiword ? (join ' or ', split /\x00/, $antiword) : 'antiword/catdoc not found: Traditional MS Word (.doc) will not be translated', "\n");
	print(defined $catdoc ? $catdoc : 'catdoc not found: MS RTF will not be translated', "\n");
	print(defined $xlsx2csv ? $xlsx2csv : 'xlsx2csv not found: Modern MS Excel (.xlsx) will not be translated', "\n");
	print(defined $xls2csv ? $xls2csv : 'xls2csv not found: Traditional MS Excel (.xls) will not be translated', "\n");
	print(defined $lynx ? $lynx : 'lynx not found: HTML will not be translated', "\n");
	print(defined $pdftotext ? $pdftotext : 'pdftotext not found: PDF will not be translated', "\n");
	print(defined $mktemp ? $mktemp : 'mktemp not found: insecure temp directory will be used', "\n");
	exit;
}

# Return $part but with non-content-related headers of $entity added to it.
# This is for replacing the content of $entity with the content of $part.

sub inherit_headers
{
	my ($entity, $part) = @_;
	@{$part->{headers}} = (grep(!/^content-/i, @{$entity->{headers}}), grep { /^content-/i } @{$part->{headers}});
	%{$part->{header}} = (map { ($_, $entity->{header}->{$_}) } grep { !/^content-/i } keys %{$entity->{header}}), (map { ($_, $part->{header}->{$_}) } grep { /^content-/i } keys %{$part->{header}});
	$part->{mbox} = $entity->{mbox} if exists $entity->{mbox};
	return $part;
}

# Translate a multipart mail message

sub textmail
{
	my $entity = shift;
	my $isapart = shift || 0; # Unused
	my @parts = @{parts($entity)};

	# Do nothing if this is encrypted (or signed unless -Z)

	return $entity if isa($entity, qr/multipart\/encrypted/i);
	return $entity if !$remove_signed && isa($entity, qr/multipart\/signed/i);

	# Remove headers

	delete_header($entity, qr/(?:@{[join '|', @headers]})[^:]*/i) if $remove_headers;
	delete_header($entity, 'X-MS-TNEF-Correlator') if $remove_tnef;

	# Reduce alternative text-versus-html to the translated html alternative
	# (or the plain text alternative if preferred and the text is not vestigial)

	if ($remove_html && isa($entity, 'multipart/alternative') && @parts == 2)
	{
		if (isa($parts[0], 'text/plain') && isa($parts[1], 'text/html', qr/\.html?$/i) || isa($parts[1], 'text/plain') && isa($parts[0], 'text/html', qr/\.html?$/i))
		{
			my $plain = $parts[isa($parts[0], 'text/plain') ? 0 : 1];
			my $html = $parts[isa($parts[0], 'text/plain') ? 1 : 0];

			if ($choose_text_alternative && !text_is_vestigial($plain))
			{
				return debase64(inherit_headers($entity, $plain));
			}
			else
			{
				return mail2singlepart(textmail(mail2multipart(inherit_headers($entity, $html)), 0));
			}
		}
	}

	# Reduce appledouble attachments to just the data fork attachment

	if ($remove_apple && isa($entity, 'multipart/appledouble') && @parts == 2)
	{
		if (isa($parts[0], 'application/applefile'))
		{
			return mail2singlepart(textmail(mail2multipart(inherit_headers($entity, $parts[1])), 0));
		}
	}

	# Reduce signed attachments to just the signed data attachment

	if ($remove_signed && isa($entity, 'multipart/signed') && @parts == 2)
	{
		if (isa($parts[1], param($entity, 'content-type', 'protocol')))
		{
			return mail2singlepart(textmail(mail2multipart(inherit_headers($entity, $parts[0])), 0));
		}
	}

	# Process parts

	for (my $i = 0; $i < @parts; ++$i)
	{
		# Save attachments

		if ($save_attachments && need_to_save($parts[$i]))
		{
			save_attachment($parts[$i]);
		}

		# Replace arbitrary attachments (via custom translations)

		my $translated = 0;

		for my $spec (@custom)
		{
			my ($mimetype_or_ext, $fmt, $cmd) = $spec =~ /^([^:]+):(?:([^:]+):)?([^:]+)$/;

			if (isa($parts[$i], qr/\Q$mimetype_or_ext\E/i, qr/\.\Q$mimetype_or_ext\E$/i))
			{
				my ($ext) = filename($parts[$i]) =~ /\.([^.]+)$/;
				$parts[$i] = translate($parts[$i], (defined $ext) ? $ext : 'dat', (defined $fmt) ? $fmt : 'txt', $cmd);
				$translated = 1;
				last;
			}
		}

		next if $translated;

		my @builtin = # [enabled, mimetype_re, extension_re, orig_ext_list, new_ext, command]
		(
			# Replace Modern MS Word attachments (.docx) with plain text (via docx2txt)
			[$remove_word, qr/application\/vnd\.openxmlformats-officedocument\.wordprocessingml\.(?:document|template)/i, qr/\.(?:docx|dotx)$/i, 'docx,dotx', 'txt', $docx2txt],
			[$remove_word, qr/application\/vnd\.ms-word\.(?:document|template)\.macroEnabled\.12/i, qr/\.(?:docm|dotm)$/i, 'docm,dotm', 'txt', $docx2txt],
			# Replace Traditional MS Word attachments (.doc) with plain text (via antiword or catdoc)
			[$remove_word, qr/application\/ms-?word/i, qr/\.(?:doc|dot)$/i, 'doc,dot', 'txt', $antiword],
			# Replace Modern MS Excel attachments (.xlsx) with csv (via xlsx2csv)
			[$remove_excel, qr/application\/vnd\.openxmlformats-officedocument\.spreadsheetml\.(?:sheet|template)/i, qr/\.(?:xlsx|xltx)$/i, 'xlsx,xltx', 'csv', $xlsx2csv],
			[$remove_excel, qr/application\/application\/vnd\.ms-excel\.(?:sheet|template|addin)\.macroEnabled\.12/i, qr/\.(?:xlsm|xltm|xlam)$/i, 'xlsm,xltm,xlam', 'csv', $xlsx2csv],
			# Replace Traditional MS Excel attachments (.xls) with csv (via xls2csv)
			[$remove_excel, qr/application\/(?:vnd\.)?ms-?excel/i, qr/\.(?:xls|xlt|xla)$/i, 'xls,xlt,xla', 'csv', $xls2csv],
			# Replace HTML attachments with plain text (via lynx)
			[$remove_html, 'text/html', qr/\.html?$/i, 'html,htm', 'txt', (defined $lynx) ? "$lynx -dump -force_html" : undef],
			# Replace RTF attachments (.rtf) with plain text (via catdoc)
			[$remove_rtf, qr/rtf/i, qr/\.rtf$/i, 'rtf', 'txt', $catdoc],
			# Replace PDF attachments with plain text (via pdftotext)
			[$remove_pdf, qr/pdf/i, qr/\.pdf$/i, 'pdf', 'txt', $pdftotext],
		);

		for my $translation (@builtin)
		{
			my ($enabled, $isa_type, $isa_ext, $exts, $fmt, $cmd) = @{$translation};

			if ($enabled && isa($parts[$i], $isa_type, $isa_ext))
			{
				$parts[$i] = translate($parts[$i], $exts, $fmt, $cmd);
				$translated = 1;
				last;
			}
		}

		next if $translated;

		# Replace TNEF attachments with the attachments contained therein

		if ($remove_tnef && isa($parts[$i], qr/ms-tnef/i, qr/winmail\.dat$/i))
		{
			my @a = winmail($parts[$i]);
			my $failed = @a == 1 && $a[0] == $parts[$i];
			@a = () if $failed && $force;
			splice @parts, $i, 1, @a;
			--$i if !$failed || $force;
			next;
		}

		# Remove images, audio, video, MS Windows executables, octet streams, application/*

		if (!need_to_keep($parts[$i]) &&
			($remove_images && (mimetype($parts[$i]) =~ /^image\// || filename($parts[$i]) =~ /\.(?:@{[join '|', @image]})(?:\?=)?$/i) ||
			$remove_audio && (mimetype($parts[$i]) =~ /^audio\// || filename($parts[$i]) =~ /\.(?:@{[join '|', @video]})(?:\?=)?$/i) ||
			$remove_video && (mimetype($parts[$i]) =~ /^video\// || filename($parts[$i]) =~ /\.(?:@{[join '|', @audio]})(?:\?=)?$/i) ||
			$remove_exe && filename($parts[$i]) =~ /\.(?:@{[join '|', @exe]})(?:\?=)?$/i ||
			$remove_octet && mimetype($parts[$i]) =~ /^application\/octet-stream/ ||
			$remove_application && mimetype($parts[$i]) =~ /^application\//))
		{
			splice @parts, $i--, 1;
			next;
		}

		# Don't use base64 encoding for text

		$parts[$i] = debase64($parts[$i]);

		# Nest

		$parts[$i]->{mime_message} = mail2singlepart(textmail(mail2multipart($parts[$i]->{mime_message}), 1)) if exists $parts[$i]->{mime_message};
		$parts[$i] = textmail($parts[$i], 1) if exists $parts[$i]->{mime_parts};
	}

	# Replace original parts with processed parts

	@parts = grep { exists $_->{mime_type} || exists $_->{body} } @parts;
	parts($entity, \@parts);

	return $entity;
}

# Determine whether or not the given text is a vestigial/useless text
# alternative that should be deleted.

sub text_is_vestigial
{
	my $part = shift;
	my $text = body($part);

	my @patterns =
	(
		'Please view this email in an application that supports HTML',
		'We have tried to send you this email as HTML',
		'Your email client does not support HTML email',
		'Your email client cannot read this email',
		'This email must be viewed in HTML mode',
		'Please enable HTML',
		@vestigial
	);

	return 1 if length($text) < 5;
	return 1 if grep { $text =~ /\Q$_\E/i } @patterns;
	return 0;
}

# Do we need to keep this attachment?

sub need_to_keep
{
	my $entity = shift;

	return match_attachment($entity, @keep);
}

# Do we need to save this attachment?

sub need_to_save
{
	my $entity = shift;

	return match_attachment($entity, @save);
}

sub match_attachment
{
	my $entity = shift;
	my @matches = @_;

	return 0 unless @matches;

	for my $spec (map { quotemeta } @matches)
	{
		return 1 if $spec =~ /\// && mimetype($entity) =~ /^$spec/i;
		return 1 if $spec !~ /\// && mimetype($entity) =~ /^application\/$spec/i;
		return 1 if $spec !~ /\// && filename($entity) =~ /\.$spec(?:\?=)?$/i;
	}

	return 0;
}

# Save an attachment to a new file on disk

sub save_attachment
{
	my $part = shift;
	my $best_filepath = $save_dirpath . filename($part);
	my $filepath = $best_filepath;
	my $has_extension = 1 if $filepath =~ /\../;
	my $max_retries = 1000; # This shouldn't be hard-coded but surely it's enough
	my $suffix = 0;

	# Find an unused name
	my $fh;
	while ($suffix <= $max_retries)
	{
		use Fcntl; sysopen($fh, $filepath, O_WRONLY | O_CREAT | O_EXCL, 0600) and last;
		warn("$name: Failed to open $filepath for writing: $!\n"), return if $suffix++ == $max_retries;
		$filepath = $best_filepath, $filepath =~ s/(\.[^.]+)$/-$suffix$1/ if $has_extension;
		$filepath = "$best_filepath-$suffix" unless $has_extension;
	}

	# Save the attachment
	print { $fh } body($part) or warn("$name: Failed to write to $filepath: $!\n");
	close $fh or warn("$name: Failed to close $filepath: $!\n");

	# Apply any timestamps (I've only ever seen this used in winmail.dat attachments)
	my $modified = parse_rfc822date(param($part, 'content-disposition', 'modification-date'));
	my $read = parse_rfc822date(param($part, 'content-disposition', 'read-date'));
	return unless $modified || $read;
	my ($atime, $mtime) = (stat($filepath))[8, 9];
	utime((defined $read) ? $read : $atime, (defined $modified) ? $modified : $mtime, $filepath);
}

# Parse an RFC822 date as a time_t

sub parse_rfc822date
{
	my $date = shift or return undef;

	my ($day, $month, $year, $hour, $minute, $second, $offset) = $date =~ /^\s*(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat), {1,2}(\d+) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d{4}) {1,2}(\d{1,2}):(\d{2}):(\d{2})(?: ([+-]\d{4}))?\s*$/;
	my %month = (Jan => 0, Feb => 1, Mar => 2, Apr => 3, May => 4, Jun => 5, Jul => 6, Aug => 7, Sep => 8, Oct => 9, Nov => 10, Dec => 11);
	$offset = '' unless defined $offset;
	$offset =~ s/(\d{2})(\d{2})/$1:$2/; # Insert ":" between hh and mm for $TZ
	$offset =~ s/^([+-])/($1 eq '-') ? '+' : '-'/e; # Reverse direction for $TZ
	my $time_t = do { local($ENV{TZ}) = 'UTC' . $offset; use POSIX; tzset; mktime($second, $minute, $hour, $day, $month{$month}, $year - 1900) };
	tzset;
	return $time_t;
}

# Check if a part is of the desired type or has the desired filename extension

sub isa
{
	my $entity = shift;
	my $type = shift;
	my $ext = shift;

	return
		mimetype($entity) =~ /$type/i ||
		mimetype($entity) =~ /^application\/$type/i ||
		defined $ext && filename($entity) =~ /$ext/i;
}

# Return a translated part

sub translate
{
	my $part = shift;
	my @ext = split /,/, shift;
	my $fmt = shift;
	my $cmd = shift;

	return $part if !defined $cmd && !$force;
	my $origpath = filename($part);
	$origpath =~ s/_+/$replace_space/g if defined $replace_space;
	$origpath .= '.' . $ext[0] unless $origpath =~ /\.(?:@{[join '|', @ext, $fmt]})$/i;
	my $textpath = $origpath;
	$textpath =~ s/\.(?:@{[join '|', @ext]})$/.$fmt/i;
	$textpath .= ".$fmt" if $textpath eq $origpath && $textpath !~ /\.$fmt$/i;
	return newmail(filename => $textpath, body => '') if !defined $cmd && $force;
	my $origdata = body($part);
	if (open my $fh, '>', "$tmp/$origpath") { print { $fh } $origdata; close $fh }
	my $failed; $failed = $origpath ne $textpath && system($_ . ' ' . quotemeta("$tmp/$origpath") . ' > ' . quotemeta("$tmp/$textpath")) || -s "$tmp/$origpath" && -z "$tmp/$textpath" or last for split /\x00/, $cmd;
	unlink "$tmp/$origpath" unless $origpath eq $textpath;
	unlink("$tmp/$textpath"), return $part if $failed && !$force;
	my $charset = param($part, 'content-type', 'charset');
	my $created = param($part, 'content-disposition', 'creation-date');
	my $modified = param($part, 'content-disposition', 'modification-date');
	my $read = param($part, 'content-disposition', 'read-date');
	$part = newmail(filename => "$tmp/$textpath", ($charset ? (charset => $charset) : ()), created => $created || undef, modified => $modified || undef, read => $read || undef);
	unlink "$tmp/$textpath";
	return $part;
}

# Recode base64-encoded text as 7bit or quoted-printable

sub debase64
{
	my $entity = shift;
	return $entity unless $recode_base64_text;
	my $type = mimetype($entity);
	return $entity unless $type =~ /^text\//i && encoding($entity) =~ /^base64$/i;
	my $body = body($entity); $body =~ tr/\r//d;
	my $filename = param($entity, 'content-disposition', 'filename') || param($entity, 'content-type', 'name');
	my ($disposition) = header($entity, 'content-disposition');
	$disposition =~ s/;.*// if defined $disposition;
	my $size = param($entity, 'content-disposition', 'size');
	my $created = param($entity, 'content-disposition', 'creation-date');
	my $modified = param($entity, 'content-disposition', 'modification-date');
	my $read = param($entity, 'content-disposition', 'read-date');
	my $charset = param($entity, 'content-type', 'charset');
	my ($description) = header($entity, 'content-description');
	my ($language) = header($entity, 'content-language');
	my ($duration) = header($entity, 'content-duration');
	my ($location) = header($entity, 'content-location');
	my ($base) = header($entity, 'content-base');
	my ($features) = header($entity, 'content-features');
	my ($alternative) = header($entity, 'content-alternative');
	my ($id) = header($entity, 'content-id');
	my ($md5) = header($entity, 'content-md5');
	my $mbox = $entity->{mbox} if exists $entity->{mbox};
	return newmail(
		type => $type,
		body => $body,
		(defined $filename ? (filename => $filename) : ()),
		(defined $disposition ? (disposition => $disposition) : ()),
		(defined $charset ? (charset => $charset) : ()),
		(defined $size ? (size => $size) : ()),
		(defined $created ? (created => $created) : ()),
		(defined $modified ? (modified => $modified) : ()),
		(defined $read ? (read => $read) : ()),
		(defined $description ? (description => $description) : ()),
		(defined $language ? (language => $language) : ()),
		(defined $duration ? (duration => $duration) : ()),
		(defined $location ? (location => $location) : ()),
		(defined $base ? (base => $base) : ()),
		(defined $features ? (features => $features) : ()),
		(defined $alternative ? (alternative => $alternative) : ()),
		(defined $id ? (id => $id) : ()),
		(defined $md5 ? (md5 => $md5) : ()),
		(defined $mbox ? (mbox => $mbox) : ())
	);
}

# Obtain specifiers from a comma-separated list of specifiers and/or the
# names of files containing specifiers, one per line (blank lines, leading
# or trailing whitespace, and shell-style comments are ignored).

sub get_specifiers
{
	my $spec = shift;
	my @list;

	for my $pat (split /\s*,+\s*/, $spec)
	{
		if (-r $pat)
		{
			open my $fh, '<', $pat or next;

			while (<$fh>)
			{
				s/#.*$//, s/^\s+//, s/\s+$//; next unless $_;
				push @list, $_;
			}

			close $fh;
		}
		else
		{
			push @list, $pat;
		}
	}

	return @list;
}

# Locate a command in the $PATH

sub find
{
	my $cmd = shift;
	return $_ for (grep { -x $_ } map { "$_/$cmd" } split /:/, $ENV{PATH});
	return undef;
}

# vi:set ts=4 sw=4:
