32 lines
947 B
Python
32 lines
947 B
Python
# coding=utf8
|
|
|
|
import unittest
|
|
from tempfile import NamedTemporaryFile
|
|
from tika import unpack
|
|
|
|
|
|
class CreateTest(unittest.TestCase):
|
|
"Test different encodings"
|
|
text_utf8 = u"Hello, world!! 😎 👽"
|
|
text_ascii = u"Hello, world!!"
|
|
|
|
def test_utf8(self):
|
|
with NamedTemporaryFile("w+b", prefix='tika-python', suffix='.txt', dir='/tmp') as f:
|
|
f.write(self.text_utf8.encode("utf8"))
|
|
f.flush()
|
|
f.seek(0)
|
|
parsed = unpack.from_file(f.name)
|
|
self.assertEqual(parsed["content"].strip(), self.text_utf8)
|
|
|
|
def test_ascii(self):
|
|
with NamedTemporaryFile("w+t", prefix='tika-python', suffix='.txt', dir='/tmp') as f:
|
|
f.write(self.text_ascii)
|
|
f.flush()
|
|
f.seek(0)
|
|
parsed = unpack.from_file(f.name)
|
|
self.assertEqual(parsed["content"].strip(), self.text_ascii)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
unittest.main()
|