183 lines
6.5 KiB
Python
183 lines
6.5 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
"""Misc unicode tests
|
||
|
|
||
|
Made for Jython.
|
||
|
"""
|
||
|
import re
|
||
|
import sys
|
||
|
import unittest
|
||
|
from StringIO import StringIO
|
||
|
from test import test_support
|
||
|
|
||
|
class UnicodeTestCase(unittest.TestCase):
|
||
|
|
||
|
def test_simplejson_plane_bug(self):
|
||
|
# a bug exposed by simplejson: unicode __add__ was always
|
||
|
# forcing the basic plane
|
||
|
chunker = re.compile(r'(.*?)(["\\\x00-\x1f])', re.VERBOSE | re.MULTILINE | re.DOTALL)
|
||
|
orig = u'z\U0001d120x'
|
||
|
quoted1 = u'"z\U0001d120x"'
|
||
|
quoted2 = '"' + orig + '"'
|
||
|
# chunker re gives different results depending on the plane
|
||
|
self.assertEqual(chunker.match(quoted1, 1).groups(), (orig, u'"'))
|
||
|
self.assertEqual(chunker.match(quoted2, 1).groups(), (orig, u'"'))
|
||
|
|
||
|
def test_parse_unicode(self):
|
||
|
foo = u'ą\n'
|
||
|
self.assertEqual(len(foo), 2, repr(foo))
|
||
|
self.assertEqual(repr(foo), "u'\\u0105\\n'")
|
||
|
self.assertEqual(ord(foo[0]), 261)
|
||
|
self.assertEqual(ord(foo[1]), 10)
|
||
|
|
||
|
bar = foo.encode('utf-8')
|
||
|
self.assertEqual(len(bar), 3)
|
||
|
self.assertEqual(repr(bar), "'\\xc4\\x85\\n'")
|
||
|
self.assertEqual(ord(bar[0]), 196)
|
||
|
self.assertEqual(ord(bar[1]), 133)
|
||
|
self.assertEqual(ord(bar[2]), 10)
|
||
|
|
||
|
def test_parse_raw_unicode(self):
|
||
|
foo = ur'ą\n'
|
||
|
self.assertEqual(len(foo), 3, repr(foo))
|
||
|
self.assertEqual(repr(foo), "u'\\u0105\\\\n'")
|
||
|
self.assertEqual(ord(foo[0]), 261)
|
||
|
self.assertEqual(ord(foo[1]), 92)
|
||
|
self.assertEqual(ord(foo[2]), 110)
|
||
|
|
||
|
bar = foo.encode('utf-8')
|
||
|
self.assertEqual(len(bar), 4)
|
||
|
self.assertEqual(repr(bar), "'\\xc4\\x85\\\\n'")
|
||
|
self.assertEqual(ord(bar[0]), 196)
|
||
|
self.assertEqual(ord(bar[1]), 133)
|
||
|
self.assertEqual(ord(bar[2]), 92)
|
||
|
self.assertEqual(ord(bar[3]), 110)
|
||
|
|
||
|
for baz in ur'Hello\u0020World !', ur'Hello\U00000020World !':
|
||
|
self.assertEqual(len(baz), 13, repr(baz))
|
||
|
self.assertEqual(repr(baz), "u'Hello World !'")
|
||
|
self.assertEqual(ord(baz[5]), 32)
|
||
|
|
||
|
quux = ur'\U00100000'
|
||
|
self.assertEqual(repr(quux), "u'\\U00100000'")
|
||
|
if sys.maxunicode == 0xffff:
|
||
|
self.assertEqual(len(quux), 2)
|
||
|
self.assertEqual(ord(quux[0]), 56256)
|
||
|
self.assertEqual(ord(quux[1]), 56320)
|
||
|
else:
|
||
|
self.assertEqual(len(quux), 1)
|
||
|
self.assertEqual(ord(quux), 1048576)
|
||
|
|
||
|
def test_raw_unicode_escape(self):
|
||
|
foo = u'\U00100000'
|
||
|
self.assertEqual(foo.encode('raw_unicode_escape'), '\\U00100000')
|
||
|
self.assertEqual(foo.encode('raw_unicode_escape').decode('raw_unicode_escape'),
|
||
|
foo)
|
||
|
for bar in '\\u', '\\u000', '\\U00000':
|
||
|
self.assertRaises(UnicodeDecodeError, bar.decode, 'raw_unicode_escape')
|
||
|
|
||
|
def test_encode_decimal(self):
|
||
|
self.assertEqual(int(u'\u0039\u0032'), 92)
|
||
|
self.assertEqual(int(u'\u0660'), 0)
|
||
|
self.assertEqual(int(u' \u001F\u0966\u096F\u0039'), 99)
|
||
|
self.assertEqual(long(u'\u0663'), 3)
|
||
|
self.assertEqual(float(u'\u0663.\u0661'), 3.1)
|
||
|
self.assertEqual(complex(u'\u0663.\u0661'), 3.1+0j)
|
||
|
|
||
|
def test_unstateful_end_of_data(self):
|
||
|
# http://bugs.jython.org/issue1368
|
||
|
for encoding in 'utf-8', 'utf-16', 'utf-16-be', 'utf-16-le':
|
||
|
self.assertRaises(UnicodeDecodeError, '\xe4'.decode, encoding)
|
||
|
|
||
|
def test_formatchar(self):
|
||
|
self.assertEqual('%c' % 255, '\xff')
|
||
|
self.assertRaises(OverflowError, '%c'.__mod__, 256)
|
||
|
|
||
|
result = u'%c' % 256
|
||
|
self.assert_(isinstance(result, unicode))
|
||
|
self.assertEqual(result, u'\u0100')
|
||
|
if sys.maxunicode == 0xffff:
|
||
|
self.assertEqual(u'%c' % sys.maxunicode, u'\uffff')
|
||
|
else:
|
||
|
self.assertEqual(u'%c' % sys.maxunicode, u'\U0010ffff')
|
||
|
self.assertRaises(OverflowError, '%c'.__mod__, sys.maxunicode + 1)
|
||
|
|
||
|
def test_repr(self):
|
||
|
self.assert_(isinstance('%r' % u'foo', str))
|
||
|
|
||
|
def test_concat(self):
|
||
|
self.assertRaises(UnicodeDecodeError, lambda : u'' + '毛泽东')
|
||
|
self.assertRaises(UnicodeDecodeError, lambda : '毛泽东' + u'')
|
||
|
|
||
|
def test_join(self):
|
||
|
self.assertRaises(UnicodeDecodeError, u''.join, ['foo', '毛泽东'])
|
||
|
self.assertRaises(UnicodeDecodeError, '毛泽东'.join, [u'foo', u'bar'])
|
||
|
|
||
|
def test_file_encoding(self):
|
||
|
'''Ensure file writing doesn't attempt to encode things by default and reading doesn't
|
||
|
decode things by default. This was jython's behavior prior to 2.2.1'''
|
||
|
EURO_SIGN = u"\u20ac"
|
||
|
try:
|
||
|
EURO_SIGN.encode()
|
||
|
except UnicodeEncodeError:
|
||
|
# This default encoding can't handle the encoding the Euro sign. Skip the test
|
||
|
return
|
||
|
|
||
|
f = open(test_support.TESTFN, "w")
|
||
|
self.assertRaises(UnicodeEncodeError, f, write, EURO_SIGN,
|
||
|
"Shouldn't be able to write out a Euro sign without first encoding")
|
||
|
f.close()
|
||
|
|
||
|
f = open(test_support.TESTFN, "w")
|
||
|
f.write(EURO_SIGN.encode('utf-8'))
|
||
|
f.close()
|
||
|
|
||
|
f = open(test_support.TESTFN, "r")
|
||
|
encoded_euro = f.read()
|
||
|
f.close()
|
||
|
os.remove(test_support.TESTFN)
|
||
|
self.assertEquals('\xe2\x82\xac', encoded_euro)
|
||
|
self.assertEquals(EURO_SIGN, encoded_euro.decode('utf-8'))
|
||
|
|
||
|
|
||
|
class UnicodeFormatTestCase(unittest.TestCase):
|
||
|
|
||
|
def test_unicode_mapping(self):
|
||
|
assertTrue = self.assertTrue
|
||
|
class EnsureUnicode(dict):
|
||
|
def __missing__(self, key):
|
||
|
assertTrue(isinstance(key, unicode))
|
||
|
return key
|
||
|
u'%(foo)s' % EnsureUnicode()
|
||
|
|
||
|
def test_non_ascii_unicode_mod_str(self):
|
||
|
# Regression test for a problem on the formatting logic: when no unicode
|
||
|
# args were found, Jython stored the resulting buffer on a PyString,
|
||
|
# decoding it later to make a PyUnicode. That crashed when the left side
|
||
|
# of % was a unicode containing non-ascii chars
|
||
|
self.assertEquals(u"\u00e7%s" % "foo", u"\u00e7foo")
|
||
|
|
||
|
|
||
|
class UnicodeStdIOTestCase(unittest.TestCase):
|
||
|
|
||
|
def setUp(self):
|
||
|
self.stdout = sys.stdout
|
||
|
|
||
|
def tearDown(self):
|
||
|
sys.stdout = self.stdout
|
||
|
|
||
|
def test_intercepted_stdout(self):
|
||
|
msg = u'Circle is 360\u00B0'
|
||
|
sys.stdout = StringIO()
|
||
|
print msg,
|
||
|
self.assertEqual(sys.stdout.getvalue(), msg)
|
||
|
|
||
|
|
||
|
def test_main():
|
||
|
test_support.run_unittest(UnicodeTestCase,
|
||
|
UnicodeFormatTestCase,
|
||
|
UnicodeStdIOTestCase)
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
test_main()
|