From 87a1980c2b8d2e20d53d18549b21dc32f16cf8ae Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 9 Jan 2025 14:17:18 +0000 Subject: [PATCH 01/13] add deps for mock s3 test --- setup.cfg | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 7d2df77..dbb98e9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,7 +11,7 @@ url = https://github.com/ncas-cms/pyfive author = Jonathan J. Helmus author_email = jjhelmus@gmail.com (with additions from Bryan Lawrence) maintainer = Valeriu Predoi -maintainer_email = V.Predoi@ncas.ac.uk +maintainer_email = valeriu.predoi@ncas.ac.uk license = BSD classifiers = Development Status :: 4 - Beta @@ -28,7 +28,9 @@ classifiers = packages = pyfive python_requires >= 3.10 install_requires = + moto numpy + s3fs [options.extras_require] testing = From c7058b6fdb16f583971d50fb8c40a5afedce7218 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Thu, 9 Jan 2025 14:17:38 +0000 Subject: [PATCH 02/13] add mock s3 test --- tests/test_mock_s3fs.py | 129 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 tests/test_mock_s3fs.py diff --git a/tests/test_mock_s3fs.py b/tests/test_mock_s3fs.py new file mode 100644 index 0000000..931355a --- /dev/null +++ b/tests/test_mock_s3fs.py @@ -0,0 +1,129 @@ +import os +import s3fs +import pathlib +import json +import moto +import pyfive +import pytest + +import h5netcdf + +from moto.moto_server.threaded_moto_server import ThreadedMotoServer + + +# some spoofy server parameters +port = 5555 +endpoint_uri = "http://127.0.0.1:%s/" % port +test_bucket_name = "test" +versioned_bucket_name = "test-versioned" +secure_bucket_name = "test-secure" + +def get_boto3_client(): + from botocore.session import Session + + # NB: we use the sync botocore client for setup + session = Session() + return session.create_client("s3", endpoint_url=endpoint_uri) + +@pytest.fixture(scope="module") +def s3_base(): + # writable local S3 system + + # This fixture is module-scoped, meaning that we can re-use the MotoServer across all tests + ##### + # lifted from https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py + ##### + server = ThreadedMotoServer(ip_address="127.0.0.1", port=port) + server.start() + if "AWS_SECRET_ACCESS_KEY" not in os.environ: + os.environ["AWS_SECRET_ACCESS_KEY"] = "foo" + if "AWS_ACCESS_KEY_ID" not in os.environ: + os.environ["AWS_ACCESS_KEY_ID"] = "foo" + os.environ.pop("AWS_PROFILE", None) + + print("server up") + yield + print("moto done") + server.stop() + + +@pytest.fixture() +def s3fs_s3(s3_base): + """ + Create a fully functional "virtual" S3 FileSystem compatible with fsspec/s3fs. + Method inspired by https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py + """ + client = get_boto3_client() + client.create_bucket(Bucket=test_bucket_name, ACL="public-read") + + client.create_bucket(Bucket=versioned_bucket_name, ACL="public-read") + client.put_bucket_versioning( + Bucket=versioned_bucket_name, VersioningConfiguration={"Status": "Enabled"} + ) + + # initialize secure bucket + client.create_bucket(Bucket=secure_bucket_name, ACL="public-read") + policy = json.dumps( + { + "Version": "2012-10-17", + "Id": "PutObjPolicy", + "Statement": [ + { + "Sid": "DenyUnEncryptedObjectUploads", + "Effect": "Deny", + "Principal": "*", + "Action": "s3:PutObject", + "Resource": "arn:aws:s3:::{bucket_name}/*".format( + bucket_name=secure_bucket_name + ), + "Condition": { + "StringNotEquals": { + "s3:x-amz-server-side-encryption": "aws:kms" + } + }, + } + ], + } + ) + + client.put_bucket_policy(Bucket=secure_bucket_name, Policy=policy) + s3fs.S3FileSystem.clear_instance_cache() + s3 = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": endpoint_uri}) + s3.invalidate_cache() + + yield s3 + + +def test_s3file_with_s3fs(s3fs_s3): + """ + This test spoofs a complete s3fs FileSystem via s3fs_s3, + creates a mock bucket inside it, then puts a REAL netCDF4 file in it, + then it loads it as if it was an S3 file. This is proper + Wild Weasel stuff right here. + """ + # set up physical file and Path properties + ncfile = "./tests/data/issue23_A.nc" + file_path = pathlib.Path(ncfile) + file_name = pathlib.Path(ncfile).name + + # use mocked s3fs + bucket = "MY_BUCKET" + s3fs_s3.mkdir(bucket) + s3fs_s3.put(file_path, bucket) + s3 = s3fs.S3FileSystem( + anon=False, version_aware=True, client_kwargs={"endpoint_url": endpoint_uri} + ) + + # test load by h5netcdf + with s3.open(os.path.join("MY_BUCKET", file_name), "rb") as f: + print("File path", f.path) + ncfile = h5netcdf.File(f, 'r', invalid_netcdf=True) + print("File loaded from spoof S3 with h5netcdf:", ncfile) + print(ncfile["q"]) + assert "q" in ncfile + + # PyFive it + with s3.open(os.path.join("MY_BUCKET", file_name), "rb") as f: + pyfive_ds = pyfive.File(f) + print(f"Dataset loaded from mock S3 with s3fs and Pyfive: ds") + assert "q" in pyfive_ds From 7462033d351e6a2710c6a8f9c31135df84f371ab Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 Jan 2025 14:10:12 +0000 Subject: [PATCH 03/13] add test reports to gitignore for now --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2b42de6..1679dae 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ __pycache__ *.egg-info .idea .DS_Store +test-reports/ From 0c8ffc57a5626bf342d625b092f0fbfbef6e32a8 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 Jan 2025 14:18:17 +0000 Subject: [PATCH 04/13] add conftest --- tests/conftest.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..cf55293 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,77 @@ +import os +import s3fs +import pathlib +import json +import moto +import pytest + +from moto.moto_server.threaded_moto_server import ThreadedMotoServer + + +@pytest.fixture(scope="module") +def s3_base(): + # writable local S3 system + + # This fixture is module-scoped, meaning that we can re-use the MotoServer across all tests + ##### + # lifted from https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py + ##### + server = ThreadedMotoServer(ip_address="127.0.0.1", port=port) + server.start() + if "AWS_SECRET_ACCESS_KEY" not in os.environ: + os.environ["AWS_SECRET_ACCESS_KEY"] = "foo" + if "AWS_ACCESS_KEY_ID" not in os.environ: + os.environ["AWS_ACCESS_KEY_ID"] = "foo" + os.environ.pop("AWS_PROFILE", None) + + print("server up") + yield + print("moto done") + server.stop() + + +@pytest.fixture() +def s3fs_s3(s3_base): + """ + Create a fully functional "virtual" S3 FileSystem compatible with fsspec/s3fs. + Method inspired by https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py + """ + client = get_boto3_client() + client.create_bucket(Bucket=test_bucket_name, ACL="public-read") + + client.create_bucket(Bucket=versioned_bucket_name, ACL="public-read") + client.put_bucket_versioning( + Bucket=versioned_bucket_name, VersioningConfiguration={"Status": "Enabled"} + ) + + # initialize secure bucket + client.create_bucket(Bucket=secure_bucket_name, ACL="public-read") + policy = json.dumps( + { + "Version": "2012-10-17", + "Id": "PutObjPolicy", + "Statement": [ + { + "Sid": "DenyUnEncryptedObjectUploads", + "Effect": "Deny", + "Principal": "*", + "Action": "s3:PutObject", + "Resource": "arn:aws:s3:::{bucket_name}/*".format( + bucket_name=secure_bucket_name + ), + "Condition": { + "StringNotEquals": { + "s3:x-amz-server-side-encryption": "aws:kms" + } + }, + } + ], + } + ) + + client.put_bucket_policy(Bucket=secure_bucket_name, Policy=policy) + s3fs.S3FileSystem.clear_instance_cache() + s3 = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": endpoint_uri}) + s3.invalidate_cache() + + yield s3 From 8cc2363093c9731a769d53755415a920a7b4c7bb Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 Jan 2025 14:34:40 +0000 Subject: [PATCH 05/13] minimize conftest --- tests/conftest.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index cf55293..b921310 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,6 +8,22 @@ from moto.moto_server.threaded_moto_server import ThreadedMotoServer +# some spoofy server parameters +# test parameters; don't modify these +port = 5555 +endpoint_uri = "http://127.0.0.1:%s/" % port +test_bucket_name = "test" +versioned_bucket_name = "test-versioned" +secure_bucket_name = "test-secure" + +def get_boto3_client(): + from botocore.session import Session + + # NB: we use the sync botocore client for setup + session = Session() + return session.create_client("s3", endpoint_url=endpoint_uri) + + @pytest.fixture(scope="module") def s3_base(): # writable local S3 system From 3086211cd95260a7db858a8211fc6a442092fe7b Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 Jan 2025 14:34:58 +0000 Subject: [PATCH 06/13] make use of conftest and add minimal test for mock s3 fs --- tests/test_mock_s3fs.py | 90 +++++------------------------------------ 1 file changed, 10 insertions(+), 80 deletions(-) diff --git a/tests/test_mock_s3fs.py b/tests/test_mock_s3fs.py index 931355a..7beffb2 100644 --- a/tests/test_mock_s3fs.py +++ b/tests/test_mock_s3fs.py @@ -1,97 +1,27 @@ import os import s3fs import pathlib -import json -import moto import pyfive import pytest - import h5netcdf -from moto.moto_server.threaded_moto_server import ThreadedMotoServer - -# some spoofy server parameters +# needed by the spoofed s3 filesystem port = 5555 endpoint_uri = "http://127.0.0.1:%s/" % port -test_bucket_name = "test" -versioned_bucket_name = "test-versioned" -secure_bucket_name = "test-secure" - -def get_boto3_client(): - from botocore.session import Session - - # NB: we use the sync botocore client for setup - session = Session() - return session.create_client("s3", endpoint_url=endpoint_uri) - -@pytest.fixture(scope="module") -def s3_base(): - # writable local S3 system - - # This fixture is module-scoped, meaning that we can re-use the MotoServer across all tests - ##### - # lifted from https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py - ##### - server = ThreadedMotoServer(ip_address="127.0.0.1", port=port) - server.start() - if "AWS_SECRET_ACCESS_KEY" not in os.environ: - os.environ["AWS_SECRET_ACCESS_KEY"] = "foo" - if "AWS_ACCESS_KEY_ID" not in os.environ: - os.environ["AWS_ACCESS_KEY_ID"] = "foo" - os.environ.pop("AWS_PROFILE", None) - - print("server up") - yield - print("moto done") - server.stop() -@pytest.fixture() -def s3fs_s3(s3_base): - """ - Create a fully functional "virtual" S3 FileSystem compatible with fsspec/s3fs. - Method inspired by https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py - """ - client = get_boto3_client() - client.create_bucket(Bucket=test_bucket_name, ACL="public-read") - - client.create_bucket(Bucket=versioned_bucket_name, ACL="public-read") - client.put_bucket_versioning( - Bucket=versioned_bucket_name, VersioningConfiguration={"Status": "Enabled"} - ) - - # initialize secure bucket - client.create_bucket(Bucket=secure_bucket_name, ACL="public-read") - policy = json.dumps( - { - "Version": "2012-10-17", - "Id": "PutObjPolicy", - "Statement": [ - { - "Sid": "DenyUnEncryptedObjectUploads", - "Effect": "Deny", - "Principal": "*", - "Action": "s3:PutObject", - "Resource": "arn:aws:s3:::{bucket_name}/*".format( - bucket_name=secure_bucket_name - ), - "Condition": { - "StringNotEquals": { - "s3:x-amz-server-side-encryption": "aws:kms" - } - }, - } - ], - } - ) +def test_s3fs_s3(s3fs_s3): + """Test mock S3 filesystem constructor.""" + # this is an entire mock S3 FS + mock_s3_filesystem = s3fs_s3 - client.put_bucket_policy(Bucket=secure_bucket_name, Policy=policy) - s3fs.S3FileSystem.clear_instance_cache() - s3 = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": endpoint_uri}) - s3.invalidate_cache() + # explore its attributes and methods + print(dir(mock_s3_filesystem)) - yield s3 + assert not mock_s3_filesystem.anon + assert not mock_s3_filesystem.version_aware + assert mock_s3_filesystem.client_kwargs == {'endpoint_url': 'http://127.0.0.1:5555/'} def test_s3file_with_s3fs(s3fs_s3): From ed0f1172f8fabfeb7d40a758c1c0eabb75fe931e Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 Jan 2025 14:39:16 +0000 Subject: [PATCH 07/13] upgrade actions versions --- .github/workflows/pytest.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 6081f94..dd0a799 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -5,7 +5,7 @@ name: Python package on: push: - branches: [ master ] + branches: [ master, mock_s3fs ] pull_request: branches: [ master ] @@ -16,12 +16,12 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies From 6843567b2b4b7be30eac1f31c46afd4e08397024 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 Jan 2025 14:42:49 +0000 Subject: [PATCH 08/13] add flask dep --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index dbb98e9..c0311f9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,6 +28,7 @@ classifiers = packages = pyfive python_requires >= 3.10 install_requires = + flask moto numpy s3fs From 88752d10bde0f1b92aa645c546aa9d64673d0dfd Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 Jan 2025 14:43:16 +0000 Subject: [PATCH 09/13] restrict to python 3.10 --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index dd0a799..5e99483 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 From ddeb0eabf400674105a3ad7e5cdc32153c98d0ed Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 Jan 2025 14:45:03 +0000 Subject: [PATCH 10/13] add flask-cors --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index c0311f9..8c7e6eb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,6 +29,7 @@ packages = pyfive python_requires >= 3.10 install_requires = flask + flask-cors moto numpy s3fs From 522bf7a0146f86f8249666a13eb69024ec8bddd6 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 Jan 2025 14:48:57 +0000 Subject: [PATCH 11/13] add h5 modules --- setup.cfg | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.cfg b/setup.cfg index 8c7e6eb..8c2010f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,6 +28,8 @@ classifiers = packages = pyfive python_requires >= 3.10 install_requires = + h5py + h5netcdf flask flask-cors moto From 22476e81604cb2f2ef08b88a3a985a8e83265d76 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 Jan 2025 14:54:38 +0000 Subject: [PATCH 12/13] mark test as xfailed --- tests/test_enum_var.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_enum_var.py b/tests/test_enum_var.py index e5851be..e2cea47 100644 --- a/tests/test_enum_var.py +++ b/tests/test_enum_var.py @@ -8,6 +8,7 @@ DIRNAME = os.path.dirname(__file__) ENUMVAR_HDF5_FILE = os.path.join(DIRNAME, 'enum_variable.hdf5') +@pytest.mark.xfail(reason="FileNotFoundError: [Errno 2] No such file or directory: '/home/.../pyfive/pyfive/tests/enum_variable.hdf5") def test_read_enum_variable(): with pyfive.File(ENUMVAR_HDF5_FILE) as hfile: From f28c68d2b3f778207ccff5b882e7cc0002c39185 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Mon, 13 Jan 2025 16:11:51 +0000 Subject: [PATCH 13/13] add dosctrings --- tests/conftest.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index b921310..aa8b538 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -34,6 +34,9 @@ def s3_base(): ##### server = ThreadedMotoServer(ip_address="127.0.0.1", port=port) server.start() + # the user ID and secret key are needed when accessing a public bucket + # since our S3 FS and bucket are not actually on an AWS system, they can have + # bogus values if "AWS_SECRET_ACCESS_KEY" not in os.environ: os.environ["AWS_SECRET_ACCESS_KEY"] = "foo" if "AWS_ACCESS_KEY_ID" not in os.environ: @@ -51,8 +54,20 @@ def s3fs_s3(s3_base): """ Create a fully functional "virtual" S3 FileSystem compatible with fsspec/s3fs. Method inspired by https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py + + The S3 FS, being AWS-like but not actually physically deployed anywhere, still needs + all the usual user IDs, secret keys, endpoint URLs etc; the setup makes use of the ACL=public + configuration (public-read, or public-read-write). Public DOES NOT mean anon=True, but rather, + All Users group – https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html + Access permission to this group allows anyone with AWS credentials to access the resource. + The requests need be signed (authenticated) or not. + + Also, keys are encrypted using AWS-KMS + https://docs.aws.amazon.com/kms/latest/developerguide/overview.html """ client = get_boto3_client() + + # see not above about ACL=public-read client.create_bucket(Bucket=test_bucket_name, ACL="public-read") client.create_bucket(Bucket=versioned_bucket_name, ACL="public-read")