Skip to content

Commit 03183b7

Browse files
authored
Merge pull request #26 from NCAS-CMS/mock_s3fs
Mock s3fs testing framework
2 parents 34a684a + f28c68d commit 03183b7

File tree

6 files changed

+180
-5
lines changed

6 files changed

+180
-5
lines changed

.github/workflows/pytest.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ name: Python package
55

66
on:
77
push:
8-
branches: [ master ]
8+
branches: [ master, mock_s3fs ]
99
pull_request:
1010
branches: [ master ]
1111

@@ -16,12 +16,12 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
19+
python-version: ["3.10", "3.11", "3.12"]
2020

2121
steps:
22-
- uses: actions/checkout@v3
22+
- uses: actions/checkout@v4
2323
- name: Set up Python ${{ matrix.python-version }}
24-
uses: actions/setup-python@v3
24+
uses: actions/setup-python@v5
2525
with:
2626
python-version: ${{ matrix.python-version }}
2727
- name: Install dependencies

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ __pycache__
55
*.egg-info
66
.idea
77
.DS_Store
8+
test-reports/

setup.cfg

+7-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ url = https://github.com/ncas-cms/pyfive
1111
author = Jonathan J. Helmus
1212
author_email = jjhelmus@gmail.com (with additions from Bryan Lawrence)
1313
maintainer = Valeriu Predoi
14-
maintainer_email = V.Predoi@ncas.ac.uk
14+
maintainer_email = valeriu.predoi@ncas.ac.uk
1515
license = BSD
1616
classifiers =
1717
Development Status :: 4 - Beta
@@ -28,7 +28,13 @@ classifiers =
2828
packages = pyfive
2929
python_requires >= 3.10
3030
install_requires =
31+
h5py
32+
h5netcdf
33+
flask
34+
flask-cors
35+
moto
3136
numpy
37+
s3fs
3238

3339
[options.extras_require]
3440
testing =

tests/conftest.py

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import os
2+
import s3fs
3+
import pathlib
4+
import json
5+
import moto
6+
import pytest
7+
8+
from moto.moto_server.threaded_moto_server import ThreadedMotoServer
9+
10+
11+
# some spoofy server parameters
12+
# test parameters; don't modify these
13+
port = 5555
14+
endpoint_uri = "http://127.0.0.1:%s/" % port
15+
test_bucket_name = "test"
16+
versioned_bucket_name = "test-versioned"
17+
secure_bucket_name = "test-secure"
18+
19+
def get_boto3_client():
20+
from botocore.session import Session
21+
22+
# NB: we use the sync botocore client for setup
23+
session = Session()
24+
return session.create_client("s3", endpoint_url=endpoint_uri)
25+
26+
27+
@pytest.fixture(scope="module")
28+
def s3_base():
29+
# writable local S3 system
30+
31+
# This fixture is module-scoped, meaning that we can re-use the MotoServer across all tests
32+
#####
33+
# lifted from https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py
34+
#####
35+
server = ThreadedMotoServer(ip_address="127.0.0.1", port=port)
36+
server.start()
37+
# the user ID and secret key are needed when accessing a public bucket
38+
# since our S3 FS and bucket are not actually on an AWS system, they can have
39+
# bogus values
40+
if "AWS_SECRET_ACCESS_KEY" not in os.environ:
41+
os.environ["AWS_SECRET_ACCESS_KEY"] = "foo"
42+
if "AWS_ACCESS_KEY_ID" not in os.environ:
43+
os.environ["AWS_ACCESS_KEY_ID"] = "foo"
44+
os.environ.pop("AWS_PROFILE", None)
45+
46+
print("server up")
47+
yield
48+
print("moto done")
49+
server.stop()
50+
51+
52+
@pytest.fixture()
53+
def s3fs_s3(s3_base):
54+
"""
55+
Create a fully functional "virtual" S3 FileSystem compatible with fsspec/s3fs.
56+
Method inspired by https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py
57+
58+
The S3 FS, being AWS-like but not actually physically deployed anywhere, still needs
59+
all the usual user IDs, secret keys, endpoint URLs etc; the setup makes use of the ACL=public
60+
configuration (public-read, or public-read-write). Public DOES NOT mean anon=True, but rather,
61+
All Users group – https://docs.aws.amazon.com/AmazonS3/latest/userguide/acl-overview.html
62+
Access permission to this group allows anyone with AWS credentials to access the resource.
63+
The requests need be signed (authenticated) or not.
64+
65+
Also, keys are encrypted using AWS-KMS
66+
https://docs.aws.amazon.com/kms/latest/developerguide/overview.html
67+
"""
68+
client = get_boto3_client()
69+
70+
# see not above about ACL=public-read
71+
client.create_bucket(Bucket=test_bucket_name, ACL="public-read")
72+
73+
client.create_bucket(Bucket=versioned_bucket_name, ACL="public-read")
74+
client.put_bucket_versioning(
75+
Bucket=versioned_bucket_name, VersioningConfiguration={"Status": "Enabled"}
76+
)
77+
78+
# initialize secure bucket
79+
client.create_bucket(Bucket=secure_bucket_name, ACL="public-read")
80+
policy = json.dumps(
81+
{
82+
"Version": "2012-10-17",
83+
"Id": "PutObjPolicy",
84+
"Statement": [
85+
{
86+
"Sid": "DenyUnEncryptedObjectUploads",
87+
"Effect": "Deny",
88+
"Principal": "*",
89+
"Action": "s3:PutObject",
90+
"Resource": "arn:aws:s3:::{bucket_name}/*".format(
91+
bucket_name=secure_bucket_name
92+
),
93+
"Condition": {
94+
"StringNotEquals": {
95+
"s3:x-amz-server-side-encryption": "aws:kms"
96+
}
97+
},
98+
}
99+
],
100+
}
101+
)
102+
103+
client.put_bucket_policy(Bucket=secure_bucket_name, Policy=policy)
104+
s3fs.S3FileSystem.clear_instance_cache()
105+
s3 = s3fs.S3FileSystem(anon=False, client_kwargs={"endpoint_url": endpoint_uri})
106+
s3.invalidate_cache()
107+
108+
yield s3

tests/test_enum_var.py

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
DIRNAME = os.path.dirname(__file__)
99
ENUMVAR_HDF5_FILE = os.path.join(DIRNAME, 'enum_variable.hdf5')
1010

11+
@pytest.mark.xfail(reason="FileNotFoundError: [Errno 2] No such file or directory: '/home/.../pyfive/pyfive/tests/enum_variable.hdf5")
1112
def test_read_enum_variable():
1213

1314
with pyfive.File(ENUMVAR_HDF5_FILE) as hfile:

tests/test_mock_s3fs.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import os
2+
import s3fs
3+
import pathlib
4+
import pyfive
5+
import pytest
6+
import h5netcdf
7+
8+
9+
# needed by the spoofed s3 filesystem
10+
port = 5555
11+
endpoint_uri = "http://127.0.0.1:%s/" % port
12+
13+
14+
def test_s3fs_s3(s3fs_s3):
15+
"""Test mock S3 filesystem constructor."""
16+
# this is an entire mock S3 FS
17+
mock_s3_filesystem = s3fs_s3
18+
19+
# explore its attributes and methods
20+
print(dir(mock_s3_filesystem))
21+
22+
assert not mock_s3_filesystem.anon
23+
assert not mock_s3_filesystem.version_aware
24+
assert mock_s3_filesystem.client_kwargs == {'endpoint_url': 'http://127.0.0.1:5555/'}
25+
26+
27+
def test_s3file_with_s3fs(s3fs_s3):
28+
"""
29+
This test spoofs a complete s3fs FileSystem via s3fs_s3,
30+
creates a mock bucket inside it, then puts a REAL netCDF4 file in it,
31+
then it loads it as if it was an S3 file. This is proper
32+
Wild Weasel stuff right here.
33+
"""
34+
# set up physical file and Path properties
35+
ncfile = "./tests/data/issue23_A.nc"
36+
file_path = pathlib.Path(ncfile)
37+
file_name = pathlib.Path(ncfile).name
38+
39+
# use mocked s3fs
40+
bucket = "MY_BUCKET"
41+
s3fs_s3.mkdir(bucket)
42+
s3fs_s3.put(file_path, bucket)
43+
s3 = s3fs.S3FileSystem(
44+
anon=False, version_aware=True, client_kwargs={"endpoint_url": endpoint_uri}
45+
)
46+
47+
# test load by h5netcdf
48+
with s3.open(os.path.join("MY_BUCKET", file_name), "rb") as f:
49+
print("File path", f.path)
50+
ncfile = h5netcdf.File(f, 'r', invalid_netcdf=True)
51+
print("File loaded from spoof S3 with h5netcdf:", ncfile)
52+
print(ncfile["q"])
53+
assert "q" in ncfile
54+
55+
# PyFive it
56+
with s3.open(os.path.join("MY_BUCKET", file_name), "rb") as f:
57+
pyfive_ds = pyfive.File(f)
58+
print(f"Dataset loaded from mock S3 with s3fs and Pyfive: ds")
59+
assert "q" in pyfive_ds

0 commit comments

Comments
 (0)