import glob import json import os import pickle import subprocess import sys from collections import defaultdict from pathlib import Path import numpy as np import pytest import fsspec from fsspec.implementations.ftp import FTPFileSystem from fsspec.implementations.http import HTTPFileSystem from fsspec.implementations.local import LocalFileSystem from fsspec.spec import AbstractBufferedFile, AbstractFileSystem PATHS_FOR_GLOB_TESTS = ( {"name": "test0.json", "type": "file", "size": 100}, {"name": "test0.yaml", "type": "file", "size": 100}, {"name": "test0", "type": "directory", "size": 0}, {"name": "test0/test0.json", "type": "file", "size": 100}, {"name": "test0/test0.yaml", "type": "file", "size": 100}, {"name": "test0/test1", "type": "directory", "size": 0}, {"name": "test0/test1/test0.json", "type": "file", "size": 100}, {"name": "test0/test1/test0.yaml", "type": "file", "size": 100}, {"name": "test0/test1/test2", "type": "directory", "size": 0}, {"name": "test0/test1/test2/test0.json", "type": "file", "size": 100}, {"name": "test0/test1/test2/test0.yaml", "type": "file", "size": 100}, {"name": "test0/test2", "type": "directory", "size": 0}, {"name": "test0/test2/test0.json", "type": "file", "size": 100}, {"name": "test0/test2/test0.yaml", "type": "file", "size": 100}, {"name": "test0/test2/test1", "type": "directory", "size": 0}, {"name": "test0/test2/test1/test0.json", "type": "file", "size": 100}, {"name": "test0/test2/test1/test0.yaml", "type": "file", "size": 100}, {"name": "test0/test2/test1/test3", "type": "directory", "size": 0}, {"name": "test0/test2/test1/test3/test0.json", "type": "file", "size": 100}, {"name": "test0/test2/test1/test3/test0.yaml", "type": "file", "size": 100}, {"name": "test1.json", "type": "file", "size": 100}, {"name": "test1.yaml", "type": "file", "size": 100}, {"name": "test1", "type": "directory", "size": 0}, {"name": "test1/test0.json", "type": "file", "size": 100}, {"name": "test1/test0.yaml", "type": "file", "size": 100}, {"name": "test1/test0", "type": "directory", "size": 0}, {"name": "test1/test0/test0.json", "type": "file", "size": 100}, {"name": "test1/test0/test0.yaml", "type": "file", "size": 100}, {"name": "special_chars", "type": "directory", "size": 0}, {"name": "special_chars/f\\oo.txt", "type": "file", "size": 100}, {"name": "special_chars/f.oo.txt", "type": "file", "size": 100}, {"name": "special_chars/f+oo.txt", "type": "file", "size": 100}, {"name": "special_chars/f(oo.txt", "type": "file", "size": 100}, {"name": "special_chars/f)oo.txt", "type": "file", "size": 100}, {"name": "special_chars/f|oo.txt", "type": "file", "size": 100}, {"name": "special_chars/f^oo.txt", "type": "file", "size": 100}, {"name": "special_chars/f$oo.txt", "type": "file", "size": 100}, {"name": "special_chars/f{oo.txt", "type": "file", "size": 100}, {"name": "special_chars/f}oo.txt", "type": "file", "size": 100}, ) GLOB_POSIX_TESTS = { "argnames": ("path", "expected"), "argvalues": [ ("nonexistent", []), ("test0.json", ["test0.json"]), ("test0", ["test0"]), ("test0/", ["test0"]), ("test1/test0.yaml", ["test1/test0.yaml"]), ("test0/test[1-2]", ["test0/test1", "test0/test2"]), ("test0/test[1-2]/", ["test0/test1", "test0/test2"]), ( "test0/test[1-2]/*", [ "test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2", "test0/test2/test0.json", "test0/test2/test0.yaml", "test0/test2/test1", ], ), ( "test0/test[1-2]/*.[j]*", ["test0/test1/test0.json", "test0/test2/test0.json"], ), ("special_chars/f\\oo.*", ["special_chars/f\\oo.txt"]), ("special_chars/f.oo.*", ["special_chars/f.oo.txt"]), ("special_chars/f+oo.*", ["special_chars/f+oo.txt"]), ("special_chars/f(oo.*", ["special_chars/f(oo.txt"]), ("special_chars/f)oo.*", ["special_chars/f)oo.txt"]), ("special_chars/f|oo.*", ["special_chars/f|oo.txt"]), ("special_chars/f^oo.*", ["special_chars/f^oo.txt"]), ("special_chars/f$oo.*", ["special_chars/f$oo.txt"]), ("special_chars/f{oo.*", ["special_chars/f{oo.txt"]), ("special_chars/f}oo.*", ["special_chars/f}oo.txt"]), ( "*", [ "special_chars", "test0.json", "test0.yaml", "test0", "test1.json", "test1.yaml", "test1", ], ), ("*.yaml", ["test0.yaml", "test1.yaml"]), ( "**", [ "special_chars", "special_chars/f$oo.txt", "special_chars/f(oo.txt", "special_chars/f)oo.txt", "special_chars/f+oo.txt", "special_chars/f.oo.txt", "special_chars/f\\oo.txt", "special_chars/f^oo.txt", "special_chars/f{oo.txt", "special_chars/f|oo.txt", "special_chars/f}oo.txt", "test0.json", "test0.yaml", "test0", "test0/test0.json", "test0/test0.yaml", "test0/test1", "test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2", "test0/test1/test2/test0.json", "test0/test1/test2/test0.yaml", "test0/test2", "test0/test2/test0.json", "test0/test2/test0.yaml", "test0/test2/test1", "test0/test2/test1/test0.json", "test0/test2/test1/test0.yaml", "test0/test2/test1/test3", "test0/test2/test1/test3/test0.json", "test0/test2/test1/test3/test0.yaml", "test1.json", "test1.yaml", "test1", "test1/test0.json", "test1/test0.yaml", "test1/test0", "test1/test0/test0.json", "test1/test0/test0.yaml", ], ), ("*/", ["special_chars", "test0", "test1"]), ( "**/", [ "special_chars", "test0", "test0/test1", "test0/test1/test2", "test0/test2", "test0/test2/test1", "test0/test2/test1/test3", "test1", "test1/test0", ], ), ("*/*.yaml", ["test0/test0.yaml", "test1/test0.yaml"]), ( "**/*.yaml", [ "test0.yaml", "test0/test0.yaml", "test0/test1/test0.yaml", "test0/test1/test2/test0.yaml", "test0/test2/test0.yaml", "test0/test2/test1/test0.yaml", "test0/test2/test1/test3/test0.yaml", "test1.yaml", "test1/test0.yaml", "test1/test0/test0.yaml", ], ), ( "*/test1/*", ["test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2"], ), ("*/test1/*.yaml", ["test0/test1/test0.yaml"]), ( "**/test1/*", [ "test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2", "test0/test2/test1/test0.json", "test0/test2/test1/test0.yaml", "test0/test2/test1/test3", "test1/test0.json", "test1/test0.yaml", "test1/test0", ], ), ( "**/test1/*.yaml", [ "test0/test1/test0.yaml", "test0/test2/test1/test0.yaml", "test1/test0.yaml", ], ), ("*/test1/*/", ["test0/test1/test2"]), ( "**/test1/*/", ["test0/test1/test2", "test0/test2/test1/test3", "test1/test0"], ), ( "*/test1/**", [ "test0/test1", "test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2", "test0/test1/test2/test0.json", "test0/test1/test2/test0.yaml", ], ), ( "**/test1/**", [ "test0/test1", "test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2", "test0/test1/test2/test0.json", "test0/test1/test2/test0.yaml", "test0/test2/test1", "test0/test2/test1/test0.json", "test0/test2/test1/test0.yaml", "test0/test2/test1/test3", "test0/test2/test1/test3/test0.json", "test0/test2/test1/test3/test0.yaml", "test1", "test1/test0.json", "test1/test0.yaml", "test1/test0", "test1/test0/test0.json", "test1/test0/test0.yaml", ], ), ("*/test1/**/", ["test0/test1", "test0/test1/test2"]), ( "**/test1/**/", [ "test0/test1", "test0/test1/test2", "test0/test2/test1", "test0/test2/test1/test3", "test1", "test1/test0", ], ), ( "test0/*", ["test0/test0.json", "test0/test0.yaml", "test0/test1", "test0/test2"], ), ("test0/*.yaml", ["test0/test0.yaml"]), ( "test0/**", [ "test0", "test0/test0.json", "test0/test0.yaml", "test0/test1", "test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2", "test0/test1/test2/test0.json", "test0/test1/test2/test0.yaml", "test0/test2", "test0/test2/test0.json", "test0/test2/test0.yaml", "test0/test2/test1", "test0/test2/test1/test0.json", "test0/test2/test1/test0.yaml", "test0/test2/test1/test3", "test0/test2/test1/test3/test0.json", "test0/test2/test1/test3/test0.yaml", ], ), ("test0/*/", ["test0/test1", "test0/test2"]), ( "test0/**/", [ "test0", "test0/test1", "test0/test1/test2", "test0/test2", "test0/test2/test1", "test0/test2/test1/test3", ], ), ("test0/*/*.yaml", ["test0/test1/test0.yaml", "test0/test2/test0.yaml"]), ( "test0/**/*.yaml", [ "test0/test0.yaml", "test0/test1/test0.yaml", "test0/test1/test2/test0.yaml", "test0/test2/test0.yaml", "test0/test2/test1/test0.yaml", "test0/test2/test1/test3/test0.yaml", ], ), ( "test0/*/test1/*", [ "test0/test2/test1/test0.json", "test0/test2/test1/test0.yaml", "test0/test2/test1/test3", ], ), ("test0/*/test1/*.yaml", ["test0/test2/test1/test0.yaml"]), ( "test0/**/test1/*", [ "test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2", "test0/test2/test1/test0.json", "test0/test2/test1/test0.yaml", "test0/test2/test1/test3", ], ), ( "test0/**/test1/*.yaml", ["test0/test1/test0.yaml", "test0/test2/test1/test0.yaml"], ), ("test0/*/test1/*/", ["test0/test2/test1/test3"]), ("test0/**/test1/*/", ["test0/test1/test2", "test0/test2/test1/test3"]), ( "test0/*/test1/**", [ "test0/test2/test1", "test0/test2/test1/test0.json", "test0/test2/test1/test0.yaml", "test0/test2/test1/test3", "test0/test2/test1/test3/test0.json", "test0/test2/test1/test3/test0.yaml", ], ), ( "test0/**/test1/**", [ "test0/test1", "test0/test1/test0.json", "test0/test1/test0.yaml", "test0/test1/test2", "test0/test1/test2/test0.json", "test0/test1/test2/test0.yaml", "test0/test2/test1", "test0/test2/test1/test0.json", "test0/test2/test1/test0.yaml", "test0/test2/test1/test3", "test0/test2/test1/test3/test0.json", "test0/test2/test1/test3/test0.yaml", ], ), ("test0/*/test1/**/", ["test0/test2/test1", "test0/test2/test1/test3"]), ( "test0/**/test1/**/", [ "test0/test1", "test0/test1/test2", "test0/test2/test1", "test0/test2/test1/test3", ], ), ], } class DummyTestFS(AbstractFileSystem): protocol = "mock" _file_class = AbstractBufferedFile _fs_contents = ( {"name": "top_level", "type": "directory"}, {"name": "top_level/second_level", "type": "directory"}, {"name": "top_level/second_level/date=2019-10-01", "type": "directory"}, { "name": "top_level/second_level/date=2019-10-01/a.parquet", "type": "file", "size": 100, }, { "name": "top_level/second_level/date=2019-10-01/b.parquet", "type": "file", "size": 100, }, {"name": "top_level/second_level/date=2019-10-02", "type": "directory"}, { "name": "top_level/second_level/date=2019-10-02/a.parquet", "type": "file", "size": 100, }, {"name": "top_level/second_level/date=2019-10-04", "type": "directory"}, { "name": "top_level/second_level/date=2019-10-04/a.parquet", "type": "file", "size": 100, }, {"name": "misc", "type": "directory"}, {"name": "misc/foo.txt", "type": "file", "size": 100}, ) def __init__(self, fs_content=None, **kwargs): if fs_content is not None: self._fs_contents = fs_content super().__init__(**kwargs) def __getitem__(self, name): for item in self._fs_contents: if item["name"] == name: return item raise IndexError(f"{name} not found!") def ls(self, path, detail=True, refresh=True, **kwargs): if kwargs.pop("strip_proto", True): path = self._strip_protocol(path) files = not refresh and self._ls_from_cache(path) if not files: files = [ file for file in self._fs_contents if path == self._parent(file["name"]) ] files.sort(key=lambda file: file["name"]) self.dircache[path.rstrip("/")] = files if detail: return files return [file["name"] for file in files] @classmethod def get_test_paths(cls, start_with=""): """Helper to return directory and file paths with no details""" all = [ file["name"] for file in cls._fs_contents if file["name"].startswith(start_with) ] return all def _open( self, path, mode="rb", block_size=None, autocommit=True, cache_options=None, **kwargs, ): return self._file_class( self, path, mode, block_size, autocommit, cache_options=cache_options, **kwargs, ) @pytest.mark.parametrize( ["test_paths", "recursive", "maxdepth", "expected"], [ ( ( "top_level/second_level", "top_level/sec*", "top_level/sec*vel", "top_level/*", ), True, None, [ "top_level/second_level", "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", "top_level/second_level/date=2019-10-01/b.parquet", "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-02/a.parquet", "top_level/second_level/date=2019-10-04", "top_level/second_level/date=2019-10-04/a.parquet", ], ), ( ( "top_level/second_level", "top_level/sec*", "top_level/sec*vel", "top_level/*", ), False, None, [ "top_level/second_level", ], ), ( ("top_level/second_level",), True, 1, [ "top_level/second_level", "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-04", ], ), ( ("top_level/second_level",), True, 2, [ "top_level/second_level", "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", "top_level/second_level/date=2019-10-01/b.parquet", "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-02/a.parquet", "top_level/second_level/date=2019-10-04", "top_level/second_level/date=2019-10-04/a.parquet", ], ), ( ("top_level/*", "top_level/sec*", "top_level/sec*vel", "top_level/*"), True, 1, ["top_level/second_level"], ), ( ("top_level/*", "top_level/sec*", "top_level/sec*vel", "top_level/*"), True, 2, [ "top_level/second_level", "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-04", ], ), ( ("top_level/**",), False, None, [ "top_level", "top_level/second_level", "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", "top_level/second_level/date=2019-10-01/b.parquet", "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-02/a.parquet", "top_level/second_level/date=2019-10-04", "top_level/second_level/date=2019-10-04/a.parquet", ], ), ( ("top_level/**",), True, None, [ "top_level", "top_level/second_level", "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", "top_level/second_level/date=2019-10-01/b.parquet", "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-02/a.parquet", "top_level/second_level/date=2019-10-04", "top_level/second_level/date=2019-10-04/a.parquet", ], ), (("top_level/**",), True, 1, ["top_level", "top_level/second_level"]), ( ("top_level/**",), True, 2, [ "top_level", "top_level/second_level", "top_level/second_level/date=2019-10-01", "top_level/second_level/date=2019-10-01/a.parquet", "top_level/second_level/date=2019-10-01/b.parquet", "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-02/a.parquet", "top_level/second_level/date=2019-10-04", "top_level/second_level/date=2019-10-04/a.parquet", ], ), ( ("top_level/**/a.*",), False, None, [ "top_level/second_level/date=2019-10-01/a.parquet", "top_level/second_level/date=2019-10-02/a.parquet", "top_level/second_level/date=2019-10-04/a.parquet", ], ), ( ("top_level/**/a.*",), True, None, [ "top_level/second_level/date=2019-10-01/a.parquet", "top_level/second_level/date=2019-10-02/a.parquet", "top_level/second_level/date=2019-10-04/a.parquet", ], ), ( ("top_level/**/second_level/date=2019-10-02",), False, 2, [ "top_level/second_level/date=2019-10-02", ], ), ( ("top_level/**/second_level/date=2019-10-02",), True, 2, [ "top_level/second_level/date=2019-10-02", "top_level/second_level/date=2019-10-02/a.parquet", ], ), [("misc/foo.txt", "misc/*.txt"), False, None, ["misc/foo.txt"]], [("misc/foo.txt", "misc/*.txt"), True, None, ["misc/foo.txt"]], ( ("",), False, None, [DummyTestFS.root_marker], ), ( ("",), True, None, DummyTestFS.get_test_paths() + [DummyTestFS.root_marker], ), [ (Path("misc/foo.txt"),), False, None, [f"misc{os.sep}foo.txt"], ], ], ) def test_expand_path(test_paths, recursive, maxdepth, expected): """Test a number of paths and then their combination which should all yield the same set of expanded paths""" test_fs = DummyTestFS() # test single query for test_path in test_paths: paths = test_fs.expand_path(test_path, recursive=recursive, maxdepth=maxdepth) assert sorted(paths) == sorted(expected) # test with all queries paths = test_fs.expand_path( list(test_paths), recursive=recursive, maxdepth=maxdepth ) assert sorted(paths) == sorted(expected) def test_expand_paths_with_wrong_args(): test_fs = DummyTestFS() with pytest.raises(ValueError): test_fs.expand_path("top_level", recursive=True, maxdepth=0) with pytest.raises(ValueError): test_fs.expand_path("top_level", maxdepth=0) with pytest.raises(FileNotFoundError): test_fs.expand_path("top_level/**/second_level/date=2019-10-02", maxdepth=1) with pytest.raises(FileNotFoundError): test_fs.expand_path("nonexistent/*") @pytest.mark.xfail def test_find(): """Test .find() method on debian server (ftp, https) with constant folder""" filesystem, host, test_path = ( FTPFileSystem, "ftp.fau.de", "ftp://ftp.fau.de/debian-cd/current/amd64/log/success", ) test_fs = filesystem(host) filenames_ftp = test_fs.find(test_path) assert filenames_ftp filesystem, host, test_path = ( HTTPFileSystem, "https://ftp.fau.de", "https://ftp.fau.de/debian-cd/current/amd64/log/success", ) test_fs = filesystem() filenames_http = test_fs.find(test_path) roots = [f.rsplit("/", 1)[-1] for f in filenames_http] assert all(f.rsplit("/", 1)[-1] in roots for f in filenames_ftp) def test_find_details(): test_fs = DummyTestFS() filenames = test_fs.find("/") details = test_fs.find("/", detail=True) for filename in filenames: assert details[filename] == test_fs.info(filename) def test_find_file(): test_fs = DummyTestFS() filename = "misc/foo.txt" assert test_fs.find(filename) == [filename] assert test_fs.find(filename, detail=True) == {filename: {}} def test_cache(): fs = DummyTestFS() fs2 = DummyTestFS() assert fs is fs2 assert DummyTestFS.current() is fs assert len(fs._cache) == 1 del fs2 assert len(fs._cache) == 1 del fs # keeps and internal reference, doesn't get collected assert len(DummyTestFS._cache) == 1 DummyTestFS.clear_instance_cache() assert len(DummyTestFS._cache) == 0 def test_current(): fs = DummyTestFS() fs2 = DummyTestFS(arg=1) assert fs is not fs2 assert DummyTestFS.current() is fs2 DummyTestFS() assert DummyTestFS.current() is fs def test_alias(): with pytest.warns(FutureWarning, match="add_aliases"): DummyTestFS(add_aliases=True) def test_add_docs_warns(): with pytest.warns(FutureWarning, match="add_docs"): AbstractFileSystem(add_docs=True) def test_cache_options(): fs = DummyTestFS() f = AbstractBufferedFile(fs, "misc/foo.txt", cache_type="bytes") assert f.cache.trim # TODO: dummy buffered file f = AbstractBufferedFile( fs, "misc/foo.txt", cache_type="bytes", cache_options={"trim": False} ) assert f.cache.trim is False f = fs.open("misc/foo.txt", cache_type="bytes", cache_options={"trim": False}) assert f.cache.trim is False def test_trim_kwarg_warns(): fs = DummyTestFS() with pytest.warns(FutureWarning, match="cache_options"): AbstractBufferedFile(fs, "misc/foo.txt", cache_type="bytes", trim=False) def tests_file_open_error(monkeypatch): class InitiateError(ValueError): ... class UploadError(ValueError): ... class DummyBufferedFile(AbstractBufferedFile): can_initiate = False def _initiate_upload(self): if not self.can_initiate: raise InitiateError def _upload_chunk(self, final=False): raise UploadError monkeypatch.setattr(DummyTestFS, "_file_class", DummyBufferedFile) fs = DummyTestFS() with pytest.raises(InitiateError): with fs.open("misc/foo.txt", "wb") as stream: stream.write(b"hello" * stream.blocksize * 2) with pytest.raises(UploadError): with fs.open("misc/foo.txt", "wb") as stream: stream.can_initiate = True stream.write(b"hello" * stream.blocksize * 2) def test_eq(): fs = DummyTestFS() result = fs == 1 assert result is False f = AbstractBufferedFile(fs, "misc/foo.txt", cache_type="bytes") result = f == 1 assert result is False def test_pickle_multiple(): a = DummyTestFS(1) b = DummyTestFS(2, bar=1) x = pickle.dumps(a) y = pickle.dumps(b) del a, b DummyTestFS.clear_instance_cache() result = pickle.loads(x) assert result.storage_args == (1,) assert result.storage_options == {} result = pickle.loads(y) assert result.storage_args == (2,) assert result.storage_options == {"bar": 1} def test_json(): a = DummyTestFS(1) b = DummyTestFS(2, bar=1) outa = a.to_json() outb = b.to_json() assert json.loads(outb) # is valid JSON assert a != b assert "bar" in outb assert DummyTestFS.from_json(outa) is a assert DummyTestFS.from_json(outb) is b def test_ls_from_cache(): fs = DummyTestFS() uncached_results = fs.ls("top_level/second_level/", refresh=True) assert fs.ls("top_level/second_level/", refresh=False) == uncached_results # _strip_protocol removes everything by default though # for the sake of testing the _ls_from_cache interface # directly, we need run one time more without that call # to actually verify that our stripping in the client # function works. assert ( fs.ls("top_level/second_level/", refresh=False, strip_proto=False) == uncached_results ) @pytest.mark.parametrize( "dt", [ np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float32, np.float64, ], ) def test_readinto_with_numpy(tmpdir, dt): store_path = str(tmpdir / "test_arr.npy") arr = np.arange(10, dtype=dt) arr.tofile(store_path) arr2 = np.empty_like(arr) with fsspec.open(store_path, "rb") as f: f.readinto(arr2) assert np.array_equal(arr, arr2) @pytest.mark.parametrize( "dt", [ np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float32, np.float64, ], ) def test_readinto_with_multibyte(ftp_writable, tmpdir, dt): host, port, user, pw = ftp_writable ftp = FTPFileSystem(host=host, port=port, username=user, password=pw) with ftp.open("/out", "wb") as fp: arr = np.arange(10, dtype=dt) fp.write(arr.tobytes()) with ftp.open("/out", "rb") as fp: arr2 = np.empty_like(arr) fp.readinto(arr2) assert np.array_equal(arr, arr2) class DummyOpenFS(DummyTestFS): blocksize = 10 def _open(self, path, mode="rb", **kwargs): stream = open(path, mode) stream.size = os.stat(path).st_size return stream class BasicCallback(fsspec.Callback): def __init__(self, **kwargs): self.events = [] super().__init__(**kwargs) def set_size(self, size): self.events.append(("set_size", size)) def relative_update(self, inc=1): self.events.append(("relative_update", inc)) def imitate_transfer(size, chunk, *, file=True): events = [("set_size", size)] events.extend(("relative_update", size // chunk) for _ in range(chunk)) if file: # The reason that there is a relative_update(0) at the # end is that, we don't have an early exit on the # implementations of get_file/put_file so it needs to # go through the callback to get catch by the while's # condition and then it will stop the transfer. events.append(("relative_update", 0)) return events def get_files(tmpdir, amount=10): src, dest, base = [], [], [] for index in range(amount): src_path = tmpdir / f"src_{index}.txt" src_path.write_text("x" * 50, "utf-8") src.append(str(src_path)) dest.append(str(tmpdir / f"dst_{index}.txt")) base.append(str(tmpdir / f"file_{index}.txt")) return src, dest, base def test_dummy_callbacks_file(tmpdir): fs = DummyOpenFS() callback = BasicCallback() file = tmpdir / "file.txt" source = tmpdir / "tmp.txt" destination = tmpdir / "tmp2.txt" size = 100 source.write_text("x" * 100, "utf-8") fs.put_file(source, file, callback=callback) # -1 here since put_file no longer has final zero-size put assert callback.events == imitate_transfer(size, 10)[:-1] callback.events.clear() fs.get_file(file, destination, callback=callback) assert callback.events == imitate_transfer(size, 10) callback.events.clear() assert destination.read_text("utf-8") == "x" * 100 def test_dummy_callbacks_files(tmpdir): fs = DummyOpenFS() callback = BasicCallback() src, dest, base = get_files(tmpdir) fs.put(src, base, callback=callback) assert callback.events == imitate_transfer(10, 10, file=False) callback.events.clear() fs.get(base, dest, callback=callback) assert callback.events == imitate_transfer(10, 10, file=False) class BranchableCallback(BasicCallback): def __init__(self, source, dest=None, events=None, **kwargs): super().__init__(**kwargs) if dest: self.key = source, dest else: self.key = (source,) self.events = events or defaultdict(list) def branch(self, path_1, path_2, kwargs): from fsspec.implementations.local import make_path_posix path_1 = make_path_posix(path_1) path_2 = make_path_posix(path_2) kwargs["callback"] = BranchableCallback(path_1, path_2, events=self.events) def set_size(self, size): self.events[self.key].append(("set_size", size)) def relative_update(self, inc=1): self.events[self.key].append(("relative_update", inc)) def test_dummy_callbacks_files_branched(tmpdir): fs = DummyOpenFS() src, dest, base = get_files(tmpdir) callback = BranchableCallback("top-level") def check_events(lpaths, rpaths): from fsspec.implementations.local import make_path_posix base_keys = zip(make_path_posix(lpaths), make_path_posix(rpaths)) assert set(callback.events.keys()) == {("top-level",), *base_keys} assert callback.events["top-level",] == imitate_transfer(10, 10, file=False) for key in base_keys: assert callback.events[key] == imitate_transfer(50, 5) fs.put(src, base, callback=callback) check_events(src, base) callback.events.clear() fs.get(base, dest, callback=callback) check_events(base, dest) callback.events.clear() def _clean_paths(paths, prefix=""): """ Helper to cleanup paths results by doing the following: - remove the prefix provided from all paths - remove the trailing slashes from all paths - remove duplicates paths - sort all paths """ paths_list = paths if isinstance(paths, dict): paths_list = list(paths) paths_list = [p.replace(prefix, "").strip("/") for p in sorted(set(paths_list))] if isinstance(paths, dict): return {p: paths[p] for p in paths_list} return paths_list @pytest.fixture(scope="function") def glob_fs(): return DummyTestFS(fs_content=PATHS_FOR_GLOB_TESTS) @pytest.fixture(scope="function") def glob_files_folder(tmp_path): local_fs = LocalFileSystem(auto_mkdir=True) local_fake_dir = str(tmp_path) for path_info in PATHS_FOR_GLOB_TESTS: if path_info["type"] == "file": local_fs.touch(path=f"{str(tmp_path)}/{path_info['name']}") return local_fake_dir @pytest.mark.skipif( sys.platform.startswith("win"), reason="no need to run python glob posix tests on windows", ) @pytest.mark.parametrize( GLOB_POSIX_TESTS["argnames"], GLOB_POSIX_TESTS["argvalues"], ) def test_posix_tests_python_glob(path, expected, glob_files_folder): """ Tests against python glob to check if our posix tests are accurate. """ os.chdir(glob_files_folder) python_output = glob.glob(pathname=path, recursive=True) assert _clean_paths(python_output, glob_files_folder) == _clean_paths(expected) @pytest.mark.skipif( sys.platform.startswith("win"), reason="no need to run bash stat posix tests on windows", ) @pytest.mark.parametrize( GLOB_POSIX_TESTS["argnames"], GLOB_POSIX_TESTS["argvalues"], ) def test_posix_tests_bash_stat(path, expected, glob_files_folder): """ Tests against bash stat to check if our posix tests are accurate. """ try: subprocess.check_output(["bash", "-c", "shopt -s globstar"]) except FileNotFoundError: pytest.skip("bash is not available") except subprocess.CalledProcessError: pytest.skip("globstar option is not available") bash_path = ( path.replace("\\", "\\\\") .replace("$", "\\$") .replace("(", "\\(") .replace(")", "\\)") .replace("|", "\\|") ) bash_output = subprocess.run( [ "bash", "-c", f"cd {glob_files_folder} && shopt -s globstar && stat -c %N {bash_path}", ], capture_output=True, check=False, ) # Remove the last element always empty bash_output = bash_output.stdout.decode("utf-8").replace("'", "").split("\n")[:-1] assert _clean_paths(bash_output, glob_files_folder) == _clean_paths(expected) @pytest.mark.parametrize( GLOB_POSIX_TESTS["argnames"], GLOB_POSIX_TESTS["argvalues"], ) def test_glob_posix_rules(path, expected, glob_fs): output = glob_fs.glob(path=f"mock://{path}") assert _clean_paths(output) == _clean_paths(expected) detailed_output = glob_fs.glob(path=f"mock://{path}", detail=True) for name, info in _clean_paths(detailed_output).items(): assert info == glob_fs[name]