32 lines
947 B
Python
32 lines
947 B
Python
|
# coding=utf8
|
||
|
|
||
|
import unittest
|
||
|
from tempfile import NamedTemporaryFile
|
||
|
from tika import unpack
|
||
|
|
||
|
|
||
|
class CreateTest(unittest.TestCase):
|
||
|
"Test different encodings"
|
||
|
text_utf8 = u"Hello, world!! 😎 👽"
|
||
|
text_ascii = u"Hello, world!!"
|
||
|
|
||
|
def test_utf8(self):
|
||
|
with NamedTemporaryFile("w+b", prefix='tika-python', suffix='.txt', dir='/tmp') as f:
|
||
|
f.write(self.text_utf8.encode("utf8"))
|
||
|
f.flush()
|
||
|
f.seek(0)
|
||
|
parsed = unpack.from_file(f.name)
|
||
|
self.assertEqual(parsed["content"].strip(), self.text_utf8)
|
||
|
|
||
|
def test_ascii(self):
|
||
|
with NamedTemporaryFile("w+t", prefix='tika-python', suffix='.txt', dir='/tmp') as f:
|
||
|
f.write(self.text_ascii)
|
||
|
f.flush()
|
||
|
f.seek(0)
|
||
|
parsed = unpack.from_file(f.name)
|
||
|
self.assertEqual(parsed["content"].strip(), self.text_ascii)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
unittest.main()
|