RandomSec/lib/jython-2.5.1/test/test_unicode_jy.py

# -*- coding: utf-8 -*-
"""Misc unicode tests

Made for Jython.
"""
import re
import sys
import unittest
from StringIO import StringIO
from test import test_support

class UnicodeTestCase(unittest.TestCase):

    def test_simplejson_plane_bug(self):
        # a bug exposed by simplejson: unicode __add__ was always
        # forcing the basic plane
        chunker = re.compile(r'(.*?)(["\\\x00-\x1f])', re.VERBOSE | re.MULTILINE | re.DOTALL)
        orig = u'z\U0001d120x'
        quoted1 = u'"z\U0001d120x"'
        quoted2 = '"' + orig + '"'
        # chunker re gives different results depending on the plane
        self.assertEqual(chunker.match(quoted1, 1).groups(), (orig, u'"'))
        self.assertEqual(chunker.match(quoted2, 1).groups(), (orig, u'"'))

    def test_parse_unicode(self):
        foo = u'ą\n'
        self.assertEqual(len(foo), 2, repr(foo))
        self.assertEqual(repr(foo), "u'\\u0105\\n'")
        self.assertEqual(ord(foo[0]), 261)
        self.assertEqual(ord(foo[1]), 10)

        bar = foo.encode('utf-8')
        self.assertEqual(len(bar), 3)
        self.assertEqual(repr(bar), "'\\xc4\\x85\\n'")
        self.assertEqual(ord(bar[0]), 196)
        self.assertEqual(ord(bar[1]), 133)
        self.assertEqual(ord(bar[2]), 10)

    def test_parse_raw_unicode(self):
        foo = ur'ą\n'
        self.assertEqual(len(foo), 3, repr(foo))
        self.assertEqual(repr(foo), "u'\\u0105\\\\n'")
        self.assertEqual(ord(foo[0]), 261)
        self.assertEqual(ord(foo[1]), 92)
        self.assertEqual(ord(foo[2]), 110)

        bar = foo.encode('utf-8')
        self.assertEqual(len(bar), 4)
        self.assertEqual(repr(bar), "'\\xc4\\x85\\\\n'")
        self.assertEqual(ord(bar[0]), 196)
        self.assertEqual(ord(bar[1]), 133)
        self.assertEqual(ord(bar[2]), 92)
        self.assertEqual(ord(bar[3]), 110)

        for baz in ur'Hello\u0020World !', ur'Hello\U00000020World !':
            self.assertEqual(len(baz), 13, repr(baz))
            self.assertEqual(repr(baz), "u'Hello World !'")
            self.assertEqual(ord(baz[5]), 32)

        quux = ur'\U00100000'
        self.assertEqual(repr(quux), "u'\\U00100000'")
        if sys.maxunicode == 0xffff:
            self.assertEqual(len(quux), 2)
            self.assertEqual(ord(quux[0]), 56256)
            self.assertEqual(ord(quux[1]), 56320)
        else:
            self.assertEqual(len(quux), 1)
            self.assertEqual(ord(quux), 1048576)

    def test_raw_unicode_escape(self):
        foo = u'\U00100000'
        self.assertEqual(foo.encode('raw_unicode_escape'), '\\U00100000')
        self.assertEqual(foo.encode('raw_unicode_escape').decode('raw_unicode_escape'),
                         foo)
        for bar in '\\u', '\\u000', '\\U00000':
            self.assertRaises(UnicodeDecodeError, bar.decode, 'raw_unicode_escape')

    def test_encode_decimal(self):
        self.assertEqual(int(u'\u0039\u0032'), 92)
        self.assertEqual(int(u'\u0660'), 0)
        self.assertEqual(int(u' \u001F\u0966\u096F\u0039'), 99)
        self.assertEqual(long(u'\u0663'), 3)
        self.assertEqual(float(u'\u0663.\u0661'), 3.1)
        self.assertEqual(complex(u'\u0663.\u0661'), 3.1+0j)

    def test_unstateful_end_of_data(self):
        # http://bugs.jython.org/issue1368
        for encoding in 'utf-8', 'utf-16', 'utf-16-be', 'utf-16-le':
            self.assertRaises(UnicodeDecodeError, '\xe4'.decode, encoding)

    def test_formatchar(self):
        self.assertEqual('%c' % 255, '\xff')
        self.assertRaises(OverflowError, '%c'.__mod__, 256)

        result = u'%c' % 256
        self.assert_(isinstance(result, unicode))
        self.assertEqual(result, u'\u0100')
        if sys.maxunicode == 0xffff:
            self.assertEqual(u'%c' % sys.maxunicode, u'\uffff')
        else:
            self.assertEqual(u'%c' % sys.maxunicode, u'\U0010ffff')
        self.assertRaises(OverflowError, '%c'.__mod__, sys.maxunicode + 1)

    def test_repr(self):
        self.assert_(isinstance('%r' % u'foo', str))

    def test_concat(self):
        self.assertRaises(UnicodeDecodeError, lambda : u'' + '毛泽东')
        self.assertRaises(UnicodeDecodeError, lambda : '毛泽东' + u'')

    def test_join(self):
        self.assertRaises(UnicodeDecodeError, u''.join, ['foo', '毛泽东'])
        self.assertRaises(UnicodeDecodeError, '毛泽东'.join, [u'foo', u'bar'])

    def test_file_encoding(self):
        '''Ensure file writing doesn't attempt to encode things by default and reading doesn't
        decode things by default.  This was jython's behavior prior to 2.2.1'''
        EURO_SIGN = u"\u20ac"
        try:
            EURO_SIGN.encode()
        except UnicodeEncodeError:
            # This default encoding can't handle the encoding the Euro sign.  Skip the test
            return

        f = open(test_support.TESTFN, "w")
        self.assertRaises(UnicodeEncodeError, f, write, EURO_SIGN,
                "Shouldn't be able to write out a Euro sign without first encoding")
        f.close()

        f = open(test_support.TESTFN, "w")
        f.write(EURO_SIGN.encode('utf-8'))
        f.close()

        f = open(test_support.TESTFN, "r")
        encoded_euro = f.read()
        f.close()
        os.remove(test_support.TESTFN)
        self.assertEquals('\xe2\x82\xac', encoded_euro)
        self.assertEquals(EURO_SIGN, encoded_euro.decode('utf-8'))


class UnicodeFormatTestCase(unittest.TestCase):

    def test_unicode_mapping(self):
        assertTrue = self.assertTrue
        class EnsureUnicode(dict):
            def __missing__(self, key):
                assertTrue(isinstance(key, unicode))
                return key
        u'%(foo)s' % EnsureUnicode()

    def test_non_ascii_unicode_mod_str(self):
        # Regression test for a problem on the formatting logic: when no unicode
        # args were found, Jython stored the resulting buffer on a PyString,
        # decoding it later to make a PyUnicode. That crashed when the left side
        # of % was a unicode containing non-ascii chars
        self.assertEquals(u"\u00e7%s" % "foo", u"\u00e7foo")


class UnicodeStdIOTestCase(unittest.TestCase):

    def setUp(self):
        self.stdout = sys.stdout

    def tearDown(self):
        sys.stdout = self.stdout

    def test_intercepted_stdout(self):
        msg = u'Circle is 360\u00B0'
        sys.stdout = StringIO()
        print msg,
        self.assertEqual(sys.stdout.getvalue(), msg)


def test_main():
    test_support.run_unittest(UnicodeTestCase,
                              UnicodeFormatTestCase,
                              UnicodeStdIOTestCase)


if __name__ == "__main__":
    test_main()
added python code part of jython distribution in lib/jython-2.5.1 added python.path vm arg to startup script fixed infinite loop in unwrap() when displaying sequences of sequences git-svn-id: http://google-refine.googlecode.com/svn/trunk@509 7d457c2a-affb-35e4-300a-418c747d4874 2010-04-20 20:50:24 +02:00			`# -- coding: utf-8 --`
			`"""Misc unicode tests`

			`Made for Jython.`
			`"""`
			`import re`
			`import sys`
			`import unittest`
			`from StringIO import StringIO`
			`from test import test_support`

			`class UnicodeTestCase(unittest.TestCase):`

			`def test_simplejson_plane_bug(self):`
			`# a bug exposed by simplejson: unicode __add__ was always`
			`# forcing the basic plane`
			`chunker = re.compile(r'(.*?)(["\\\x00-\x1f])', re.VERBOSE \| re.MULTILINE \| re.DOTALL)`
			`orig = u'z\U0001d120x'`
			`quoted1 = u'"z\U0001d120x"'`
			`quoted2 = '"' + orig + '"'`
			`# chunker re gives different results depending on the plane`
			`self.assertEqual(chunker.match(quoted1, 1).groups(), (orig, u'"'))`
			`self.assertEqual(chunker.match(quoted2, 1).groups(), (orig, u'"'))`

			`def test_parse_unicode(self):`
			`foo = u'ą\n'`
			`self.assertEqual(len(foo), 2, repr(foo))`
			`self.assertEqual(repr(foo), "u'\\u0105\\n'")`
			`self.assertEqual(ord(foo[0]), 261)`
			`self.assertEqual(ord(foo[1]), 10)`

			`bar = foo.encode('utf-8')`
			`self.assertEqual(len(bar), 3)`
			`self.assertEqual(repr(bar), "'\\xc4\\x85\\n'")`
			`self.assertEqual(ord(bar[0]), 196)`
			`self.assertEqual(ord(bar[1]), 133)`
			`self.assertEqual(ord(bar[2]), 10)`

			`def test_parse_raw_unicode(self):`
			`foo = ur'ą\n'`
			`self.assertEqual(len(foo), 3, repr(foo))`
			`self.assertEqual(repr(foo), "u'\\u0105\\\\n'")`
			`self.assertEqual(ord(foo[0]), 261)`
			`self.assertEqual(ord(foo[1]), 92)`
			`self.assertEqual(ord(foo[2]), 110)`

			`bar = foo.encode('utf-8')`
			`self.assertEqual(len(bar), 4)`
			`self.assertEqual(repr(bar), "'\\xc4\\x85\\\\n'")`
			`self.assertEqual(ord(bar[0]), 196)`
			`self.assertEqual(ord(bar[1]), 133)`
			`self.assertEqual(ord(bar[2]), 92)`
			`self.assertEqual(ord(bar[3]), 110)`

			`for baz in ur'Hello\u0020World !', ur'Hello\U00000020World !':`
			`self.assertEqual(len(baz), 13, repr(baz))`
			`self.assertEqual(repr(baz), "u'Hello World !'")`
			`self.assertEqual(ord(baz[5]), 32)`

			`quux = ur'\U00100000'`
			`self.assertEqual(repr(quux), "u'\\U00100000'")`
			`if sys.maxunicode == 0xffff:`
			`self.assertEqual(len(quux), 2)`
			`self.assertEqual(ord(quux[0]), 56256)`
			`self.assertEqual(ord(quux[1]), 56320)`
			`else:`
			`self.assertEqual(len(quux), 1)`
			`self.assertEqual(ord(quux), 1048576)`

			`def test_raw_unicode_escape(self):`
			`foo = u'\U00100000'`
			`self.assertEqual(foo.encode('raw_unicode_escape'), '\\U00100000')`
			`self.assertEqual(foo.encode('raw_unicode_escape').decode('raw_unicode_escape'),`
			`foo)`
			`for bar in '\\u', '\\u000', '\\U00000':`
			`self.assertRaises(UnicodeDecodeError, bar.decode, 'raw_unicode_escape')`

			`def test_encode_decimal(self):`
			`self.assertEqual(int(u'\u0039\u0032'), 92)`
			`self.assertEqual(int(u'\u0660'), 0)`
			`self.assertEqual(int(u' \u001F\u0966\u096F\u0039'), 99)`
			`self.assertEqual(long(u'\u0663'), 3)`
			`self.assertEqual(float(u'\u0663.\u0661'), 3.1)`
			`self.assertEqual(complex(u'\u0663.\u0661'), 3.1+0j)`

			`def test_unstateful_end_of_data(self):`
			`# http://bugs.jython.org/issue1368`
			`for encoding in 'utf-8', 'utf-16', 'utf-16-be', 'utf-16-le':`
			`self.assertRaises(UnicodeDecodeError, '\xe4'.decode, encoding)`

			`def test_formatchar(self):`
			`self.assertEqual('%c' % 255, '\xff')`
			`self.assertRaises(OverflowError, '%c'.__mod__, 256)`

			`result = u'%c' % 256`
			`self.assert_(isinstance(result, unicode))`
			`self.assertEqual(result, u'\u0100')`
			`if sys.maxunicode == 0xffff:`
			`self.assertEqual(u'%c' % sys.maxunicode, u'\uffff')`
			`else:`
			`self.assertEqual(u'%c' % sys.maxunicode, u'\U0010ffff')`
			`self.assertRaises(OverflowError, '%c'.__mod__, sys.maxunicode + 1)`

			`def test_repr(self):`
			`self.assert_(isinstance('%r' % u'foo', str))`

			`def test_concat(self):`
			`self.assertRaises(UnicodeDecodeError, lambda : u'' + '毛泽东')`
			`self.assertRaises(UnicodeDecodeError, lambda : '毛泽东' + u'')`

			`def test_join(self):`
			`self.assertRaises(UnicodeDecodeError, u''.join, ['foo', '毛泽东'])`
			`self.assertRaises(UnicodeDecodeError, '毛泽东'.join, [u'foo', u'bar'])`

			`def test_file_encoding(self):`
			`'''Ensure file writing doesn't attempt to encode things by default and reading doesn't`
			`decode things by default. This was jython's behavior prior to 2.2.1'''`
			`EURO_SIGN = u"\u20ac"`
			`try:`
			`EURO_SIGN.encode()`
			`except UnicodeEncodeError:`
			`# This default encoding can't handle the encoding the Euro sign. Skip the test`
			`return`

			`f = open(test_support.TESTFN, "w")`
			`self.assertRaises(UnicodeEncodeError, f, write, EURO_SIGN,`
			`"Shouldn't be able to write out a Euro sign without first encoding")`
			`f.close()`

			`f = open(test_support.TESTFN, "w")`
			`f.write(EURO_SIGN.encode('utf-8'))`
			`f.close()`

			`f = open(test_support.TESTFN, "r")`
			`encoded_euro = f.read()`
			`f.close()`
			`os.remove(test_support.TESTFN)`
			`self.assertEquals('\xe2\x82\xac', encoded_euro)`
			`self.assertEquals(EURO_SIGN, encoded_euro.decode('utf-8'))`


			`class UnicodeFormatTestCase(unittest.TestCase):`

			`def test_unicode_mapping(self):`
			`assertTrue = self.assertTrue`
			`class EnsureUnicode(dict):`
			`def __missing__(self, key):`
			`assertTrue(isinstance(key, unicode))`
			`return key`
			`u'%(foo)s' % EnsureUnicode()`

			`def test_non_ascii_unicode_mod_str(self):`
			`# Regression test for a problem on the formatting logic: when no unicode`
			`# args were found, Jython stored the resulting buffer on a PyString,`
			`# decoding it later to make a PyUnicode. That crashed when the left side`
			`# of % was a unicode containing non-ascii chars`
			`self.assertEquals(u"\u00e7%s" % "foo", u"\u00e7foo")`


			`class UnicodeStdIOTestCase(unittest.TestCase):`

			`def setUp(self):`
			`self.stdout = sys.stdout`

			`def tearDown(self):`
			`sys.stdout = self.stdout`

			`def test_intercepted_stdout(self):`
			`msg = u'Circle is 360\u00B0'`
			`sys.stdout = StringIO()`
			`print msg,`
			`self.assertEqual(sys.stdout.getvalue(), msg)`


			`def test_main():`
			`test_support.run_unittest(UnicodeTestCase,`
			`UnicodeFormatTestCase,`
			`UnicodeStdIOTestCase)`


			`if __name__ == "__main__":`
			`test_main()`