PCQRSCANER/venv/Lib/site-packages/tika/tests/tests_unpack.py

32 lines
947 B
Python
Raw Normal View History

2019-12-22 21:51:47 +01:00
# coding=utf8
import unittest
from tempfile import NamedTemporaryFile
from tika import unpack
class CreateTest(unittest.TestCase):
"Test different encodings"
text_utf8 = u"Hello, world!! 😎 👽"
text_ascii = u"Hello, world!!"
def test_utf8(self):
with NamedTemporaryFile("w+b", prefix='tika-python', suffix='.txt', dir='/tmp') as f:
f.write(self.text_utf8.encode("utf8"))
f.flush()
f.seek(0)
parsed = unpack.from_file(f.name)
self.assertEqual(parsed["content"].strip(), self.text_utf8)
def test_ascii(self):
with NamedTemporaryFile("w+t", prefix='tika-python', suffix='.txt', dir='/tmp') as f:
f.write(self.text_ascii)
f.flush()
f.seek(0)
parsed = unpack.from_file(f.name)
self.assertEqual(parsed["content"].strip(), self.text_ascii)
if __name__ == '__main__':
unittest.main()