import bz2 import gzip import lzma import os import pickle import tarfile import tempfile import zipfile from contextlib import contextmanager from io import BytesIO import pytest import fsspec # The blueprint to create synthesized archive files from. archive_data = {"a": b"", "b": b"hello", "deeply/nested/path": b"stuff"} @contextmanager def tempzip(data=None): """ Provide test cases with temporary synthesized Zip archives. """ data = data or {} f = tempfile.mkstemp(suffix=".zip")[1] with zipfile.ZipFile(f, mode="w") as z: for k, v in data.items(): z.writestr(k, v) try: yield f finally: try: os.remove(f) except OSError: pass @contextmanager def temparchive(data=None): """ Provide test cases with temporary synthesized 7-Zip archives. """ data = data or {} libarchive = pytest.importorskip("libarchive") f = tempfile.mkstemp(suffix=".7z")[1] with libarchive.file_writer(f, "7zip") as archive: for k, v in data.items(): archive.add_file_from_memory(entry_path=k, entry_size=len(v), entry_data=v) try: yield f finally: try: os.remove(f) except OSError: pass @contextmanager def temptar(data=None, mode="w", suffix=".tar"): """ Provide test cases with temporary synthesized .tar archives. """ data = data or {} fn = tempfile.mkstemp(suffix=suffix)[1] with tarfile.TarFile.open(fn, mode=mode) as t: touched = {} for name, data in data.items(): # Create directory hierarchy. # https://bugs.python.org/issue22208#msg225558 if "/" in name and name not in touched: parts = os.path.dirname(name).split("/") for index in range(1, len(parts) + 1): info = tarfile.TarInfo("/".join(parts[:index])) info.type = tarfile.DIRTYPE t.addfile(info) touched[name] = True # Add file content. info = tarfile.TarInfo(name=name) info.size = len(data) t.addfile(info, BytesIO(data)) try: yield fn finally: try: os.remove(fn) except OSError: pass @contextmanager def temptargz(data=None, mode="w", suffix=".tar.gz"): """ Provide test cases with temporary synthesized .tar.gz archives. """ with temptar(data=data, mode=mode) as tarname: fn = tempfile.mkstemp(suffix=suffix)[1] with open(tarname, "rb") as tar: cf = gzip.GzipFile(filename=fn, mode=mode) cf.write(tar.read()) cf.close() try: yield fn finally: try: os.remove(fn) except OSError: pass @contextmanager def temptarbz2(data=None, mode="w", suffix=".tar.bz2"): """ Provide test cases with temporary synthesized .tar.bz2 archives. """ with temptar(data=data, mode=mode) as tarname: fn = tempfile.mkstemp(suffix=suffix)[1] with open(tarname, "rb") as tar: cf = bz2.BZ2File(filename=fn, mode=mode) cf.write(tar.read()) cf.close() try: yield fn finally: try: os.remove(fn) except OSError: pass @contextmanager def temptarxz(data=None, mode="w", suffix=".tar.xz"): """ Provide test cases with temporary synthesized .tar.xz archives. """ with temptar(data=data, mode=mode) as tarname: fn = tempfile.mkstemp(suffix=suffix)[1] with open(tarname, "rb") as tar: cf = lzma.open(filename=fn, mode=mode, format=lzma.FORMAT_XZ) cf.write(tar.read()) cf.close() try: yield fn finally: try: os.remove(fn) except OSError: pass class ArchiveTestScenario: """ Describe a test scenario for any type of archive. """ def __init__(self, protocol=None, provider=None, variant=None): # The filesystem protocol identifier. Any of "zip", "tar" or "libarchive". self.protocol = protocol # A contextmanager function to provide temporary synthesized archives. self.provider = provider # The filesystem protocol variant identifier. Any of "gz", "bz2" or "xz". self.variant = variant def pytest_generate_tests(metafunc): """ Generate test scenario parametrization arguments with appropriate labels (idlist). On the one hand, this yields an appropriate output like:: fsspec/implementations/tests/test_archive.py::TestArchive::test_empty[zip] PASSED # noqa On the other hand, it will support perfect test discovery, like:: pytest fsspec -vvv -k "zip or tar or libarchive" https://docs.pytest.org/en/latest/example/parametrize.html#a-quick-port-of-testscenarios """ idlist = [] argnames = ["scenario"] argvalues = [] for scenario in metafunc.cls.scenarios: scenario: ArchiveTestScenario = scenario label = scenario.protocol if scenario.variant: label += "-" + scenario.variant idlist.append(label) argvalues.append([scenario]) metafunc.parametrize(argnames, argvalues, ids=idlist, scope="class") # Define test scenarios. scenario_zip = ArchiveTestScenario(protocol="zip", provider=tempzip) scenario_tar = ArchiveTestScenario(protocol="tar", provider=temptar) scenario_targz = ArchiveTestScenario(protocol="tar", provider=temptargz, variant="gz") scenario_tarbz2 = ArchiveTestScenario( protocol="tar", provider=temptarbz2, variant="bz2" ) scenario_tarxz = ArchiveTestScenario(protocol="tar", provider=temptarxz, variant="xz") scenario_libarchive = ArchiveTestScenario(protocol="libarchive", provider=temparchive) class TestAnyArchive: """ Validate that all filesystem adapter implementations for archive files will adhere to the same specification. """ scenarios = [ scenario_zip, scenario_tar, scenario_targz, scenario_tarbz2, scenario_tarxz, scenario_libarchive, ] def test_repr(self, scenario: ArchiveTestScenario): with scenario.provider() as archive: fs = fsspec.filesystem(scenario.protocol, fo=archive) assert repr(fs).startswith("