RandomSec/extensions/jython/module/MOD-INF/lib/jython/mimify.py
Stefano Mazzocchi ba60780fa3 avoid the double naming problem in modules
git-svn-id: http://google-refine.googlecode.com/svn/trunk@996 7d457c2a-affb-35e4-300a-418c747d4874
2010-06-19 02:32:12 +00:00

465 lines
14 KiB
Python

#! /usr/bin/env python
"""Mimification and unmimification of mail messages.
Decode quoted-printable parts of a mail message or encode using
quoted-printable.
Usage:
mimify(input, output)
unmimify(input, output, decode_base64 = 0)
to encode and decode respectively. Input and output may be the name
of a file or an open file object. Only a readline() method is used
on the input file, only a write() method is used on the output file.
When using file names, the input and output file names may be the
same.
Interactive usage:
mimify.py -e [infile [outfile]]
mimify.py -d [infile [outfile]]
to encode and decode respectively. Infile defaults to standard
input and outfile to standard output.
"""
# Configure
MAXLEN = 200 # if lines longer than this, encode as quoted-printable
CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail
QUOTE = '> ' # string replies are quoted with
# End configure
import re
__all__ = ["mimify","unmimify","mime_encode_header","mime_decode_header"]
qp = re.compile('^content-transfer-encoding:\\s*quoted-printable', re.I)
base64_re = re.compile('^content-transfer-encoding:\\s*base64', re.I)
mp = re.compile('^content-type:.*multipart/.*boundary="?([^;"\n]*)', re.I|re.S)
chrset = re.compile('^(content-type:.*charset=")(us-ascii|iso-8859-[0-9]+)(".*)', re.I|re.S)
he = re.compile('^-*\n')
mime_code = re.compile('=([0-9a-f][0-9a-f])', re.I)
mime_head = re.compile('=\\?iso-8859-1\\?q\\?([^? \t\n]+)\\?=', re.I)
repl = re.compile('^subject:\\s+re: ', re.I)
class File:
"""A simple fake file object that knows about limited read-ahead and
boundaries. The only supported method is readline()."""
def __init__(self, file, boundary):
self.file = file
self.boundary = boundary
self.peek = None
def readline(self):
if self.peek is not None:
return ''
line = self.file.readline()
if not line:
return line
if self.boundary:
if line == self.boundary + '\n':
self.peek = line
return ''
if line == self.boundary + '--\n':
self.peek = line
return ''
return line
class HeaderFile:
def __init__(self, file):
self.file = file
self.peek = None
def readline(self):
if self.peek is not None:
line = self.peek
self.peek = None
else:
line = self.file.readline()
if not line:
return line
if he.match(line):
return line
while 1:
self.peek = self.file.readline()
if len(self.peek) == 0 or \
(self.peek[0] != ' ' and self.peek[0] != '\t'):
return line
line = line + self.peek
self.peek = None
def mime_decode(line):
"""Decode a single line of quoted-printable text to 8bit."""
newline = ''
pos = 0
while 1:
res = mime_code.search(line, pos)
if res is None:
break
newline = newline + line[pos:res.start(0)] + \
chr(int(res.group(1), 16))
pos = res.end(0)
return newline + line[pos:]
def mime_decode_header(line):
"""Decode a header line to 8bit."""
newline = ''
pos = 0
while 1:
res = mime_head.search(line, pos)
if res is None:
break
match = res.group(1)
# convert underscores to spaces (before =XX conversion!)
match = ' '.join(match.split('_'))
newline = newline + line[pos:res.start(0)] + mime_decode(match)
pos = res.end(0)
return newline + line[pos:]
def unmimify_part(ifile, ofile, decode_base64 = 0):
"""Convert a quoted-printable part of a MIME mail message to 8bit."""
multipart = None
quoted_printable = 0
is_base64 = 0
is_repl = 0
if ifile.boundary and ifile.boundary[:2] == QUOTE:
prefix = QUOTE
else:
prefix = ''
# read header
hfile = HeaderFile(ifile)
while 1:
line = hfile.readline()
if not line:
return
if prefix and line[:len(prefix)] == prefix:
line = line[len(prefix):]
pref = prefix
else:
pref = ''
line = mime_decode_header(line)
if qp.match(line):
quoted_printable = 1
continue # skip this header
if decode_base64 and base64_re.match(line):
is_base64 = 1
continue
ofile.write(pref + line)
if not prefix and repl.match(line):
# we're dealing with a reply message
is_repl = 1
mp_res = mp.match(line)
if mp_res:
multipart = '--' + mp_res.group(1)
if he.match(line):
break
if is_repl and (quoted_printable or multipart):
is_repl = 0
# read body
while 1:
line = ifile.readline()
if not line:
return
line = re.sub(mime_head, '\\1', line)
if prefix and line[:len(prefix)] == prefix:
line = line[len(prefix):]
pref = prefix
else:
pref = ''
## if is_repl and len(line) >= 4 and line[:4] == QUOTE+'--' and line[-3:] != '--\n':
## multipart = line[:-1]
while multipart:
if line == multipart + '--\n':
ofile.write(pref + line)
multipart = None
line = None
break
if line == multipart + '\n':
ofile.write(pref + line)
nifile = File(ifile, multipart)
unmimify_part(nifile, ofile, decode_base64)
line = nifile.peek
if not line:
# premature end of file
break
continue
# not a boundary between parts
break
if line and quoted_printable:
while line[-2:] == '=\n':
line = line[:-2]
newline = ifile.readline()
if newline[:len(QUOTE)] == QUOTE:
newline = newline[len(QUOTE):]
line = line + newline
line = mime_decode(line)
if line and is_base64 and not pref:
import base64
line = base64.decodestring(line)
if line:
ofile.write(pref + line)
def unmimify(infile, outfile, decode_base64 = 0):
"""Convert quoted-printable parts of a MIME mail message to 8bit."""
if type(infile) == type(''):
ifile = open(infile)
if type(outfile) == type('') and infile == outfile:
import os
d, f = os.path.split(infile)
os.rename(infile, os.path.join(d, ',' + f))
else:
ifile = infile
if type(outfile) == type(''):
ofile = open(outfile, 'w')
else:
ofile = outfile
nifile = File(ifile, None)
unmimify_part(nifile, ofile, decode_base64)
ofile.flush()
mime_char = re.compile('[=\177-\377]') # quote these chars in body
mime_header_char = re.compile('[=?\177-\377]') # quote these in header
def mime_encode(line, header):
"""Code a single line as quoted-printable.
If header is set, quote some extra characters."""
if header:
reg = mime_header_char
else:
reg = mime_char
newline = ''
pos = 0
if len(line) >= 5 and line[:5] == 'From ':
# quote 'From ' at the start of a line for stupid mailers
newline = ('=%02x' % ord('F')).upper()
pos = 1
while 1:
res = reg.search(line, pos)
if res is None:
break
newline = newline + line[pos:res.start(0)] + \
('=%02x' % ord(res.group(0))).upper()
pos = res.end(0)
line = newline + line[pos:]
newline = ''
while len(line) >= 75:
i = 73
while line[i] == '=' or line[i-1] == '=':
i = i - 1
i = i + 1
newline = newline + line[:i] + '=\n'
line = line[i:]
return newline + line
mime_header = re.compile('([ \t(]|^)([-a-zA-Z0-9_+]*[\177-\377][-a-zA-Z0-9_+\177-\377]*)(?=[ \t)]|\n)')
def mime_encode_header(line):
"""Code a single header line as quoted-printable."""
newline = ''
pos = 0
while 1:
res = mime_header.search(line, pos)
if res is None:
break
newline = '%s%s%s=?%s?Q?%s?=' % \
(newline, line[pos:res.start(0)], res.group(1),
CHARSET, mime_encode(res.group(2), 1))
pos = res.end(0)
return newline + line[pos:]
mv = re.compile('^mime-version:', re.I)
cte = re.compile('^content-transfer-encoding:', re.I)
iso_char = re.compile('[\177-\377]')
def mimify_part(ifile, ofile, is_mime):
"""Convert an 8bit part of a MIME mail message to quoted-printable."""
has_cte = is_qp = is_base64 = 0
multipart = None
must_quote_body = must_quote_header = has_iso_chars = 0
header = []
header_end = ''
message = []
message_end = ''
# read header
hfile = HeaderFile(ifile)
while 1:
line = hfile.readline()
if not line:
break
if not must_quote_header and iso_char.search(line):
must_quote_header = 1
if mv.match(line):
is_mime = 1
if cte.match(line):
has_cte = 1
if qp.match(line):
is_qp = 1
elif base64_re.match(line):
is_base64 = 1
mp_res = mp.match(line)
if mp_res:
multipart = '--' + mp_res.group(1)
if he.match(line):
header_end = line
break
header.append(line)
# read body
while 1:
line = ifile.readline()
if not line:
break
if multipart:
if line == multipart + '--\n':
message_end = line
break
if line == multipart + '\n':
message_end = line
break
if is_base64:
message.append(line)
continue
if is_qp:
while line[-2:] == '=\n':
line = line[:-2]
newline = ifile.readline()
if newline[:len(QUOTE)] == QUOTE:
newline = newline[len(QUOTE):]
line = line + newline
line = mime_decode(line)
message.append(line)
if not has_iso_chars:
if iso_char.search(line):
has_iso_chars = must_quote_body = 1
if not must_quote_body:
if len(line) > MAXLEN:
must_quote_body = 1
# convert and output header and body
for line in header:
if must_quote_header:
line = mime_encode_header(line)
chrset_res = chrset.match(line)
if chrset_res:
if has_iso_chars:
# change us-ascii into iso-8859-1
if chrset_res.group(2).lower() == 'us-ascii':
line = '%s%s%s' % (chrset_res.group(1),
CHARSET,
chrset_res.group(3))
else:
# change iso-8859-* into us-ascii
line = '%sus-ascii%s' % chrset_res.group(1, 3)
if has_cte and cte.match(line):
line = 'Content-Transfer-Encoding: '
if is_base64:
line = line + 'base64\n'
elif must_quote_body:
line = line + 'quoted-printable\n'
else:
line = line + '7bit\n'
ofile.write(line)
if (must_quote_header or must_quote_body) and not is_mime:
ofile.write('Mime-Version: 1.0\n')
ofile.write('Content-Type: text/plain; ')
if has_iso_chars:
ofile.write('charset="%s"\n' % CHARSET)
else:
ofile.write('charset="us-ascii"\n')
if must_quote_body and not has_cte:
ofile.write('Content-Transfer-Encoding: quoted-printable\n')
ofile.write(header_end)
for line in message:
if must_quote_body:
line = mime_encode(line, 0)
ofile.write(line)
ofile.write(message_end)
line = message_end
while multipart:
if line == multipart + '--\n':
# read bit after the end of the last part
while 1:
line = ifile.readline()
if not line:
return
if must_quote_body:
line = mime_encode(line, 0)
ofile.write(line)
if line == multipart + '\n':
nifile = File(ifile, multipart)
mimify_part(nifile, ofile, 1)
line = nifile.peek
if not line:
# premature end of file
break
ofile.write(line)
continue
# unexpectedly no multipart separator--copy rest of file
while 1:
line = ifile.readline()
if not line:
return
if must_quote_body:
line = mime_encode(line, 0)
ofile.write(line)
def mimify(infile, outfile):
"""Convert 8bit parts of a MIME mail message to quoted-printable."""
if type(infile) == type(''):
ifile = open(infile)
if type(outfile) == type('') and infile == outfile:
import os
d, f = os.path.split(infile)
os.rename(infile, os.path.join(d, ',' + f))
else:
ifile = infile
if type(outfile) == type(''):
ofile = open(outfile, 'w')
else:
ofile = outfile
nifile = File(ifile, None)
mimify_part(nifile, ofile, 0)
ofile.flush()
import sys
if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'):
import getopt
usage = 'Usage: mimify [-l len] -[ed] [infile [outfile]]'
decode_base64 = 0
opts, args = getopt.getopt(sys.argv[1:], 'l:edb')
if len(args) not in (0, 1, 2):
print usage
sys.exit(1)
if (('-e', '') in opts) == (('-d', '') in opts) or \
((('-b', '') in opts) and (('-d', '') not in opts)):
print usage
sys.exit(1)
for o, a in opts:
if o == '-e':
encode = mimify
elif o == '-d':
encode = unmimify
elif o == '-l':
try:
MAXLEN = int(a)
except (ValueError, OverflowError):
print usage
sys.exit(1)
elif o == '-b':
decode_base64 = 1
if len(args) == 0:
encode_args = (sys.stdin, sys.stdout)
elif len(args) == 1:
encode_args = (args[0], sys.stdout)
else:
encode_args = (args[0], args[1])
if decode_base64:
encode_args = encode_args + (decode_base64,)
encode(*encode_args)