
107 lines
3.6 KiB
Raw Normal View History

2019-12-22 21:51:47 +01:00
# This file contains two main methods used to encode and decode UTF-7
# string, described in the RFC 3501. There are some variations specific
# to IMAP4rev1, so the built-in Python UTF-7 codec can't be used instead.
# The main difference is the shift character (used to switch from ASCII to
# base64 encoding context), which is & in this modified UTF-7 convention,
# since + is considered as mainly used in mailbox names.
# Other variations and examples can be found in the RFC 3501, section 5.1.3.
from __future__ import unicode_literals
import binascii
from six import binary_type, text_type, byte2int, iterbytes, unichr
def encode(s):
"""Encode a folder name using IMAP modified UTF-7 encoding.
Input is unicode; output is bytes (Python 3) or str (Python 2). If
non-unicode input is provided, the input is returned unchanged.
if not isinstance(s, text_type):
return s
res = []
b64_buffer = []
def consume_b64_buffer(buf):
Consume the buffer by encoding it into a modified base 64 representation
and surround it with shift characters & and -
if b64_buffer:
res.extend([b'&', base64_utf7_encode(buf), b'-'])
del buf[:]
for c in s:
# printable ascii case should not be modified
if 0x20 <= ord(c) <= 0x7e:
# Special case: & is used as shift character so we need to escape it in ASCII
if c == '&':
# Bufferize characters that will be encoded in base64 and append them later
# in the result, when iterating over ASCII character or the end of string
# Consume the remaining buffer if the string finish with non-ASCII characters
return b''.join(res)
AMPERSAND_ORD = byte2int(b'&')
DASH_ORD = byte2int(b'-')
def decode(s):
"""Decode a folder name from IMAP modified UTF-7 encoding to unicode.
Input is bytes (Python 3) or str (Python 2); output is always
unicode. If non-bytes/str input is provided, the input is returned
if not isinstance(s, binary_type):
return s
res = []
# Store base64 substring that will be decoded once stepping on end shift character
b64_buffer = bytearray()
for c in iterbytes(s):
# Shift character without anything in buffer -> starts storing base64 substring
if c == AMPERSAND_ORD and not b64_buffer:
# End shift char. -> append the decoded buffer to the result and reset it
elif c == DASH_ORD and b64_buffer:
# Special case &-, representing "&" escaped
if len(b64_buffer) == 1:
b64_buffer = bytearray()
# Still buffering between the shift character and the shift back to ASCII
elif b64_buffer:
# No buffer initialized yet, should be an ASCII printable char
# Decode the remaining buffer if any
if b64_buffer:
return ''.join(res)
def base64_utf7_encode(buffer):
s = ''.join(buffer).encode('utf-16be')
return binascii.b2a_base64(s).rstrip(b'\n=').replace(b'/', b',')
def base64_utf7_decode(s):
s_utf7 = b'+' + s.replace(b',', b'/') + b'-'
return s_utf7.decode('utf-7')