From 51866d7b6fa3bd2d972c31bc10382954a817eaef Mon Sep 17 00:00:00 2001 From: Marian Tietz Date: Tue, 23 Jun 2020 23:20:45 +0200 Subject: [PATCH 1/7] to_device: Handle nested lists/tuples recursively The previous implementation of `to_device` would break when a user decided to return a list of tensors in `forward`. This patch applies `to_device` recursively and adds support for lists in addition to tuples. --- skorch/tests/test_utils.py | 30 ++++++++++++++++++++++++++++++ skorch/utils.py | 4 ++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/skorch/tests/test_utils.py b/skorch/tests/test_utils.py index 4673ed352..e821330a6 100644 --- a/skorch/tests/test_utils.py +++ b/skorch/tests/test_utils.py @@ -155,6 +155,10 @@ def x_pad_seq(self): length = torch.as_tensor([2, 2, 1]) return pack_padded_sequence(value, length) + @pytest.fixture + def x_list(self): + return [torch.zeros(3), torch.ones(2, 4)] + def check_device_type(self, tensor, device_input, prev_device): """assert expected device type conditioned on the input argument for `to_device`""" if None is device_input: @@ -230,6 +234,32 @@ def test_check_device_packed_padded_sequence( x_pad_seq = to_device(x_pad_seq, device=device_to) self.check_device_type(x_pad_seq.data, device_to, prev_device) + @pytest.mark.parametrize('device_from, device_to', [ + ('cpu', 'cpu'), + ('cpu', 'cuda'), + ('cuda', 'cpu'), + ('cuda', 'cuda'), + (None, None), + ]) + def test_nested_data(self, to_device, x_list, device_from, device_to): + # Sometimes data is nested because it would need to be padded so its + # easier to return a list of tensors with different shapes. + # to_device should honor this. + if 'cuda' in (device_from, device_to) and not torch.cuda.is_available(): + pytest.skip() + + prev_devices = [None for _ in range(len(x_list))] + if None in (device_from, device_to): + prev_devices = [x.device.type for x in x_list] + + x_list = to_device(x_list, device=device_from) + for xi, prev_d in zip(x_list, prev_devices): + self.check_device_type(xi, device_from, prev_d) + + x_list = to_device(x_list, device=device_to) + for xi, prev_d in zip(x_list, prev_devices): + self.check_device_type(xi, device_to, prev_d) + class TestDuplicateItems: @pytest.fixture diff --git a/skorch/utils.py b/skorch/utils.py index 024906cdc..68bd4884d 100644 --- a/skorch/utils.py +++ b/skorch/utils.py @@ -147,8 +147,8 @@ def to_device(X, device): return X # PackedSequence class inherits from a namedtuple - if isinstance(X, tuple) and (type(X) != PackedSequence): - return tuple(x.to(device) for x in X) + if isinstance(X, (tuple, list)) and (type(X) != PackedSequence): + return tuple(to_device(x, device) for x in X) return X.to(device) From d109433738c60587c483b1e095807443fe0613ae Mon Sep 17 00:00:00 2001 From: ottonemo Date: Wed, 24 Jun 2020 17:01:42 +0200 Subject: [PATCH 2/7] Update skorch/tests/test_utils.py Co-authored-by: Benjamin Bossan --- skorch/tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skorch/tests/test_utils.py b/skorch/tests/test_utils.py index e821330a6..39d56b892 100644 --- a/skorch/tests/test_utils.py +++ b/skorch/tests/test_utils.py @@ -242,7 +242,7 @@ def test_check_device_packed_padded_sequence( (None, None), ]) def test_nested_data(self, to_device, x_list, device_from, device_to): - # Sometimes data is nested because it would need to be padded so its + # Sometimes data is nested because it would need to be padded so it's # easier to return a list of tensors with different shapes. # to_device should honor this. if 'cuda' in (device_from, device_to) and not torch.cuda.is_available(): From 3da5fb57fcbe18e029d3aa52fa812cfbc39ab91f Mon Sep 17 00:00:00 2001 From: Marian Tietz Date: Wed, 24 Jun 2020 18:06:04 +0200 Subject: [PATCH 3/7] Don't change list to tuple --- skorch/tests/test_utils.py | 4 ++++ skorch/utils.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/skorch/tests/test_utils.py b/skorch/tests/test_utils.py index 39d56b892..4981eb761 100644 --- a/skorch/tests/test_utils.py +++ b/skorch/tests/test_utils.py @@ -253,10 +253,14 @@ def test_nested_data(self, to_device, x_list, device_from, device_to): prev_devices = [x.device.type for x in x_list] x_list = to_device(x_list, device=device_from) + assert isinstance(x_list, list) + for xi, prev_d in zip(x_list, prev_devices): self.check_device_type(xi, device_from, prev_d) x_list = to_device(x_list, device=device_to) + assert isinstance(x_list, list) + for xi, prev_d in zip(x_list, prev_devices): self.check_device_type(xi, device_to, prev_d) diff --git a/skorch/utils.py b/skorch/utils.py index 68bd4884d..3652fe73c 100644 --- a/skorch/utils.py +++ b/skorch/utils.py @@ -148,7 +148,7 @@ def to_device(X, device): # PackedSequence class inherits from a namedtuple if isinstance(X, (tuple, list)) and (type(X) != PackedSequence): - return tuple(to_device(x, device) for x in X) + return type(X)(to_device(x, device) for x in X) return X.to(device) From 7fec24f6b48e7bfcfe59e4630277e96c00d6485a Mon Sep 17 00:00:00 2001 From: Marian Tietz Date: Wed, 24 Jun 2020 18:06:36 +0200 Subject: [PATCH 4/7] Apply list conversion logic to to_numpy as well --- skorch/tests/test_utils.py | 40 ++++++++++++++++++++++++++++++++++++++ skorch/utils.py | 3 +++ 2 files changed, 43 insertions(+) diff --git a/skorch/tests/test_utils.py b/skorch/tests/test_utils.py index 4981eb761..a2e385bfb 100644 --- a/skorch/tests/test_utils.py +++ b/skorch/tests/test_utils.py @@ -135,6 +135,46 @@ def test_sparse_tensor_not_accepted_raises(self, to_tensor, device): assert exc.value.args[0] == msg +class TestToNumpy: + @pytest.fixture + def to_numpy(self): + from skorch.utils import to_numpy + return to_numpy + + @pytest.fixture + def x_tensor(self): + return torch.zeros(3, 4) + + @pytest.fixture + def x_tuple(self): + return torch.ones(3), torch.zeros(3, 4) + + @pytest.fixture + def x_list(self): + return [torch.ones(3), torch.zeros(3, 4)] + + def compare_array_to_tensor(self, x_numpy, x_tensor): + assert isinstance(x_tensor, torch.Tensor) + assert isinstance(x_numpy, np.ndarray) + assert x_numpy.shape == x_tensor.shape + for a, b in zip(x_numpy.flatten(), x_tensor.flatten()): + assert np.isclose(a, b.item()) + + def test_tensor(self, to_numpy, x_tensor): + x_numpy = to_numpy(x_tensor) + self.compare_array_to_tensor(x_numpy, x_tensor) + + def test_list(self, to_numpy, x_list): + x_numpy = to_numpy(x_list) + for entry_numpy, entry_torch in zip(x_numpy, x_list): + self.compare_array_to_tensor(entry_numpy, entry_torch) + + def test_tuple(self, to_numpy, x_tuple): + x_numpy = to_numpy(x_tuple) + for entry_numpy, entry_torch in zip(x_numpy, x_tuple): + self.compare_array_to_tensor(entry_numpy, entry_torch) + + class TestToDevice: @pytest.fixture def to_device(self): diff --git a/skorch/utils.py b/skorch/utils.py index 3652fe73c..617ef6c28 100644 --- a/skorch/utils.py +++ b/skorch/utils.py @@ -113,6 +113,9 @@ def to_numpy(X): if is_pandas_ndframe(X): return X.values + if isinstance(X, (tuple, list)): + return type(X)(to_numpy(x) for x in X) + if not is_torch_data_type(X): raise TypeError("Cannot convert this data type to a numpy array.") From 491d35ebf875464f66a4685936f88ec0faf850fe Mon Sep 17 00:00:00 2001 From: Marian Tietz Date: Wed, 1 Jul 2020 19:00:49 +0200 Subject: [PATCH 5/7] Add tests for dict unpacking --- CHANGES.md | 1 + skorch/tests/test_utils.py | 29 ++++++++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 6e8ece802..ae3109a46 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Raise `FutureWarning` when using `CyclicLR` scheduler, because the default behavior has changed from taking a step every batch to taking a step every epoch. (#626) - Set train/validation on criterion if it's a PyTorch module (#621) - Don't pass `y=None` to `NeuralNet.train_split` to enable the direct use of split functions without positional `y` in their signatures. This is useful when working with unsupervised data (#605). +- `to_numpy` is now able to unpack dicts and lists/tuples (#657, #658) ### Fixed diff --git a/skorch/tests/test_utils.py b/skorch/tests/test_utils.py index eb6f1a8b8..b06d3c873 100644 --- a/skorch/tests/test_utils.py +++ b/skorch/tests/test_utils.py @@ -153,6 +153,10 @@ def x_tuple(self): def x_list(self): return [torch.ones(3), torch.zeros(3, 4)] + @pytest.fixture + def x_dict(self): + return {'a': torch.ones(3), 'b': (torch.zeros(2), torch.zeros(3))} + def compare_array_to_tensor(self, x_numpy, x_tensor): assert isinstance(x_tensor, torch.Tensor) assert isinstance(x_numpy, np.ndarray) @@ -174,6 +178,25 @@ def test_tuple(self, to_numpy, x_tuple): for entry_numpy, entry_torch in zip(x_numpy, x_tuple): self.compare_array_to_tensor(entry_numpy, entry_torch) + def test_dict(self, to_numpy, x_dict): + x_numpy = to_numpy(x_dict) + self.compare_array_to_tensor(x_numpy['a'], x_dict['a']) + self.compare_array_to_tensor(x_numpy['b'][0], x_dict['b'][0]) + self.compare_array_to_tensor(x_numpy['b'][1], x_dict['b'][1]) + + @pytest.mark.parametrize('x_invalid', [ + 1, + [1,2,3], + (1,2,3), + {'a': 1}, + ]) + def test_invalid_inputs(self, to_numpy, x_invalid): + " Inputs that are invalid for the scope of to_numpy. " + with pytest.raises(TypeError) as e: + to_numpy(x_invalid) + expected = "Cannot convert this data type to a numpy array." + assert e.value.args[0] == expected + class TestToDevice: @pytest.fixture @@ -195,7 +218,7 @@ def x_dict(self): 'x': torch.zeros(3), 'y': torch.ones((4, 5)) } - + @pytest.fixture def x_pad_seq(self): value = torch.zeros((5, 3)).float() @@ -258,7 +281,7 @@ def test_check_device_tuple_torch_tensor( x_tup = to_device(x_tup, device=device_to) for xi, prev_d in zip(x_tup, prev_devices): self.check_device_type(xi, device_to, prev_d) - + @pytest.mark.parametrize('device_from, device_to', [ ('cpu', 'cpu'), ('cpu', 'cuda'), @@ -288,7 +311,7 @@ def test_check_device_dict_torch_tensor( assert x_dict.keys() == original_x_dict.keys() for k in x_dict: assert np.allclose(x_dict[k], original_x_dict[k]) - + @pytest.mark.parametrize('device_from, device_to', [ ('cpu', 'cpu'), ('cpu', 'cuda'), From c4fa14337f66a046da43d122f6a6b3d8acfcad91 Mon Sep 17 00:00:00 2001 From: Marian Tietz Date: Wed, 1 Jul 2020 19:04:08 +0200 Subject: [PATCH 6/7] Document scope of to_numpy --- skorch/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/skorch/utils.py b/skorch/utils.py index 34d9ceb91..bf412760b 100644 --- a/skorch/utils.py +++ b/skorch/utils.py @@ -104,6 +104,10 @@ def to_tensor(X, device, accept_sparse=False): def to_numpy(X): """Generic function to convert a pytorch tensor to numpy. + This function tries to unpack the tensor(s) from supported + data structures (e.g., dicts, lists, etc.) but doesn't go + beyond. + Returns X when it already is a numpy array. """ From c7c3433ae4cdcbdaf2ed5f4bad2ae4611cde47f8 Mon Sep 17 00:00:00 2001 From: Marian Tietz Date: Thu, 2 Jul 2020 16:17:34 +0200 Subject: [PATCH 7/7] Convert doc string to comment --- skorch/tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skorch/tests/test_utils.py b/skorch/tests/test_utils.py index b06d3c873..44969bee9 100644 --- a/skorch/tests/test_utils.py +++ b/skorch/tests/test_utils.py @@ -191,7 +191,7 @@ def test_dict(self, to_numpy, x_dict): {'a': 1}, ]) def test_invalid_inputs(self, to_numpy, x_invalid): - " Inputs that are invalid for the scope of to_numpy. " + # Inputs that are invalid for the scope of to_numpy. with pytest.raises(TypeError) as e: to_numpy(x_invalid) expected = "Cannot convert this data type to a numpy array."