#!/usr/bin/perl
$VERSION = '1.03';
# Cyrillic Mail Filter
# Version 1.03
# Part of "Cyrillic Software Suite"
# Get docs and newest version from
# http://www.neystadt.org/cyrillic/
#
# Copyright (c) 1997-98, John Neystadt
# You may install this script on your site for free
# To obtain permision for redistribution or any other usage
# contact john@neystadt.org.
#
# Drop me a line if you deploy this script on your site.
=head1 NAME
cyr-mail-filter.pl v1.03 - Cyrillic Mail Filter to convert email across cyrillic charsets
=cut
use Mail::Internet;
use Mail::Field;
use MIME::Field::ContType;
use MIME::Words;
use Lingua::DetectCharset;
use Convert::Cyrillic;
use Getopt::Std;
$ScriptName = "Cyrillic Mail Filter";
$ScriptVersion = $VERSION;
if ($#ARGV == -1) {
Usage ();
}
=head1 DESCRIPTION
Copyright (c) 1998 John Neystadt (http://www.neystadt.org/cyrillic/).
Filters email messages and converts the content across cyrillic charsets accrodingly to specified options.
This script addresses the following problems of cyrillization:
=over
=item *
Some people can not read cyrillic and need to get mail transliterated.
=item *
Most mail software does not sets correctly the B tag in headers.
=item *
Many mail readers can't handle cyrillic in B field.
=back
=head1 SYNOPSIS
cyr-mail-filter.pl [B<-s>] B<-f>[] B<-t>
=head1 OPTIONS
=over
=item -s
Always transliterate subject
=item -f{AUTO,WIN,KOI8,ISO}
If _F_rom charset is not specified, will use charset from 'Content-Type' header for conversions. Otherwise will ignore the
header. AUTO will detect the charset automatically from email body. WIN, KOI8 or ISO will enforce the source charset
accordingly.
=item -t{VOL,WIN,KOI8,ISO}
Convert text _T_o charset, where charset is one of WIN, KOI8, ISO or VOL (e.g. volapuk or translit).
=back
=cut
sub Usage {
print STDERR "
$ScriptName v$ScriptVersion. Copyright (c) 1998 John Neystadt (http://www.neystadt.org/cyrillic/).
Filters email messages and converts the content across cyrillic charsets accrodingly to specified options.
USAGE: [-s] -f[] -t
-s - Always transliterate _S_ubject.
-f{AUTO,WIN,KOI8,ISO} - If _F_rom charset is not specified, will use charset from 'Content-Type' header
for conversions. Otherwise will ignore the header. AUTO will detect the charset
automatically from email body. WIN, KOI8 or ISO will enforce the source charset
accordingly.
-t{VOL,WIN,KOI8,ISO} - Convert text _T_o charset, where charset is one of WIN, KOI8, ISO or
VOL (e.g. volapuk or translit).\n";
exit (1);
}
my %opts;
getopt ("stf", \%opts);
my $paramTranslitSubj = 1 if exists $opts {'s'};
my $paramDstEnc = $opts {'t'};
my $paramSrcEnc = $opts {'f'} || 1 if exists $opts {'f'};
my $msg = new Mail::Internet (\*STDIN);
my $header = $msg->head();
my $body = $msg->body ();
my $ContentType = Mail::Field->extract ('Content-Type', $header);
$ContentType = new Mail::Field ('Content-Type', 'text/plain')
if $ContentType == undef;
my $SrcEnc;
if ($paramSrcEnc eq 'AUTO') {
$SrcEnc = Lingua::DetectCharset::Detect (join (' ', @{$body}));
# Disable any translations if the email is not in cyrillic
$paramDstEnc = 'VOL' if $SrcEnc eq 'ENG';
} elsif ($paramSrcEnc eq 'WIN' || $paramSrcEnc eq 'KOI8' || $paramSrcEnc eq 'ISO') {
$SrcEnc = $paramSrcEnc;
} elsif ($paramSrcEnc == 1) {
my %Cs2Enc = ('koi8-r' => 'KOI8', 'windows-1251' => 'WIN', 'iso-8859-5' => 'ISO');
$SrcEnc = $Cs2Enc {$ContentType->charset};
$SrcEnc = 'VOL' if !$SrcEnc;
} else {
Usage ();
}
my ($DstCharset, $DstEnc);
if ($paramDstEnc eq 'WIN' || $paramDstEnc eq 'KOI8' || $paramDstEnc eq 'VOL' || $paramDstEnc eq 'ISO') {
my %Enc2Cs = ('KOI8' => 'koi8-r', 'WIN' => 'windows-1251', 'ISO' => 'iso-8859-5', 'VOL' => undef);
$DstEnc = $paramDstEnc;
$DstCharset = $Enc2Cs {$DstEnc};
} else {
Usage ();
}
if ($SrcEnc ne 'ENG') {
for (0..$#{$body}) {
${$body} [$_] = Convert::Cyrillic::cstocs ($SrcEnc, $DstEnc, ${$body} [$_]);
}
my $Subject = MIME::Words::decode_mimewords ($header->get ('Subject'));
$header->replace ('Subject', Convert::Cyrillic::cstocs ($SrcEnc, $paramTranslitSubj ? 'VOL' : $DstEnc, $Subject));
}
$header->replace ('X-Removed-Content-Type', $ContentType->stringify);
$ContentType->charset ($DstCharset);
$header->replace ('Content-Type', $ContentType->stringify);
$header->add ('X-Converted-By', "$ScriptName v$ScriptVersion ($SrcEnc -> $DstEnc), get your own from http://www.neystadt.org/cyrillic/");
$msg->print ();
__END__
=head1 EXAMPLES
On UNIX to create mail account B, which automatically translates all incoming mail, you can use the following:
to-win: "|./cyr-mail-filter.pl -fAUTO -tWIN -s | resend my-email@jopa.org"
Good use also is to create majordomo mailing list that auto-transliterates all incoming mail:
ksp-lat: "|/usr/lib/majordomo/wrapper ./cyr-mail-filter.pl -fAUTO -tVOL -s | ./resend -l ksp-lat ksp-lat-resend"
=head1 PREREQUISITES
This script requires the C, C, C and C
modules available from CPAN or at http://www.neystadt.org/cyrillic/.
=pod OSNAMES
any
=cut