Skip to content
This repository was archived by the owner on Jun 22, 2022. It is now read-only.

Commit 2024748

Browse files
author
Kamil A. Kaczmarek
authored
Dev s12 (#112)
* removed suffixes * removed global _ALL_STEPS_NAMES, check upstream names by default - if two names are the same raise an error, simplified error logs
1 parent 99275ea commit 2024748

File tree

3 files changed

+37
-51
lines changed

3 files changed

+37
-51
lines changed

docs/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
# The short X.Y version
2727
version = '0.1'
2828
# The full version, including alpha/beta/rc tags
29-
release = '0.1.11'
29+
release = '0.1.12'
3030

3131

3232
# -- General configuration ---------------------------------------------------

setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@
1313

1414
setup(name='steppy',
1515
packages=['steppy'],
16-
version='0.1.11',
16+
version='0.1.12',
1717
description='A lightweight, open-source, Python library for fast and reproducible experimentation',
1818
long_description=long_description,
1919
url='https://github.com/minerva-ml/steppy',
20-
download_url='https://github.com/minerva-ml/steppy/archive/0.1.11.tar.gz',
20+
download_url='https://github.com/minerva-ml/steppy/archive/0.1.12.tar.gz',
2121
author='Kamil A. Kaczmarek, Jakub Czakon',
2222
author_email='kamil.kaczmarek@neptune.ml, jakub.czakon@neptune.ml',
2323
keywords=['machine-learning', 'reproducibility', 'pipeline', 'data-science'],

steppy/base.py

+34-48
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
'load_persisted_output': False
1919
}
2020

21-
_ALL_STEPS_NAMES = list()
22-
2321

2422
class Step:
2523
"""Step is a building block of steppy pipelines.
@@ -180,41 +178,42 @@ def __init__(self,
180178
cache_output=False,
181179
load_persisted_output=False):
182180

183-
name = self._format_step_name(name, transformer)
181+
self.name = self._format_step_name(name, transformer)
184182

185183
if experiment_directory is not None:
186184
assert isinstance(experiment_directory, str),\
187185
'Step {} error, experiment_directory must ' \
188-
'be str, got {} instead.'.format(name, type(experiment_directory))
186+
'be str, got {} instead.'.format(self.name, type(experiment_directory))
189187
else:
190188
experiment_directory = os.path.join(os.path.expanduser("~"), '.steppy')
191189
logger.info('Using default experiment directory: {}'.format(experiment_directory))
192190

193191
if output_directory is not None:
194192
assert isinstance(output_directory, str),\
195-
'Step {}, output_directory must be str, got {} instead'.format(name, type(output_directory))
193+
'Step {}, output_directory must be str, got {} instead'.format(self.name, type(output_directory))
196194

197195
if input_data is not None:
198196
assert isinstance(input_data, list), 'Step {} error, input_data must be list, ' \
199-
'got {} instead.'.format(name, type(input_data))
197+
'got {} instead.'.format(self.name, type(input_data))
200198
if input_steps is not None:
201199
assert isinstance(input_steps, list), 'Step {} error, input_steps must be list, ' \
202-
'got {} instead.'.format(name, type(input_steps))
200+
'got {} instead.'.format(self.name, type(input_steps))
203201
if adapter is not None:
204202
assert isinstance(adapter, Adapter), 'Step {} error, adapter must be an instance ' \
205-
'of {}'.format(name, str(Adapter))
203+
'of {}'.format(self.name, str(Adapter))
206204

207205
assert isinstance(cache_output, bool), 'Step {} error, cache_output must be bool, ' \
208-
'got {} instead.'.format(name, type(cache_output))
206+
'got {} instead.'.format(self.name, type(cache_output))
209207
assert isinstance(persist_output, bool), 'Step {} error, persist_output must be bool, ' \
210-
'got {} instead.'.format(name, type(persist_output))
208+
'got {} instead.'.format(self.name, type(persist_output))
211209
assert isinstance(load_persisted_output, bool),\
212210
'Step {} error, load_persisted_output ' \
213-
'must be bool, got {} instead.'.format(name, type(load_persisted_output))
211+
'must be bool, got {} instead.'.format(self.name, type(load_persisted_output))
214212
assert isinstance(force_fitting, bool), 'Step {} error, force_fitting must be bool, ' \
215-
'got {} instead.'.format(name, type(force_fitting))
213+
'got {} instead.'.format(self.name, type(force_fitting))
216214

217-
logger.info('Initializing Step {}'.format(name))
215+
self._validate_upstream_names()
216+
logger.info('Initializing Step {}'.format(self.name))
218217

219218
self.transformer = transformer
220219
self.output_directory = output_directory
@@ -228,11 +227,7 @@ def __init__(self,
228227
self.force_fitting = force_fitting
229228

230229
self.output = None
231-
self.name = self._apply_suffix(name)
232-
_ALL_STEPS_NAMES.append(self.name)
233-
234230
self.experiment_directory = os.path.join(experiment_directory)
235-
236231
self._prepare_experiment_directories()
237232
self._mode = 'train'
238233

@@ -492,7 +487,7 @@ def get_step_by_name(self, name):
492487
return self.all_upstream_steps[name]
493488
except KeyError as e:
494489
msg = 'No Step with name "{}" found. ' \
495-
'You have following Steps: {}'.format(name, _ALL_STEPS_NAMES)
490+
'You have following Steps: {}'.format(name, list(self.all_upstream_steps.keys()))
496491
raise StepError(msg) from e
497492

498493
def persist_upstream_structure(self):
@@ -525,9 +520,8 @@ def _fit_transform_operation(self, step_inputs):
525520
try:
526521
step_output_data = self.transformer.transform(**step_inputs)
527522
except Exception as e:
528-
msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
529-
'Check "Step.transformer" implementation"'.format(self.name,
530-
self.transformer.__class__.__name__)
523+
msg = 'Step {}, Transformer "{}" error ' \
524+
'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
531525
raise StepError(msg) from e
532526

533527
logger.info('Step {}, transforming completed'.format(self.name))
@@ -537,9 +531,8 @@ def _fit_transform_operation(self, step_inputs):
537531
try:
538532
step_output_data = self.transformer.fit_transform(**step_inputs)
539533
except Exception as e:
540-
msg = 'Step {}, Transformer "{}" error during "fit_transform()" operation. ' \
541-
'Check "Step.transformer" implementation"'.format(self.name,
542-
self.transformer.__class__.__name__)
534+
msg = 'Step {}, Transformer "{}" error ' \
535+
'during "fit_transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
543536
raise StepError(msg) from e
544537

545538
logger.info('Step {}, fitting and transforming completed'.format(self.name))
@@ -552,10 +545,8 @@ def _fit_transform_operation(self, step_inputs):
552545
try:
553546
step_output_data = self.transformer.transform(**step_inputs)
554547
except Exception as e:
555-
msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
556-
'This Transformer is not fittable. ' \
557-
'Check "Step.transformer" implementation"'.format(self.name,
558-
self.transformer.__class__.__name__)
548+
msg = 'Step {}, Transformer "{}" error ' \
549+
'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
559550
raise StepError(msg) from e
560551

561552
logger.info('Step {}, transforming completed'.format(self.name))
@@ -579,9 +570,8 @@ def _transform_operation(self, step_inputs):
579570
try:
580571
step_output_data = self.transformer.transform(**step_inputs)
581572
except Exception as e:
582-
msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
583-
'Check "Step.transformer" implementation"'.format(self.name,
584-
self.transformer.__class__.__name__)
573+
msg = 'Step {}, Transformer "{}" error ' \
574+
'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
585575
raise StepError(msg) from e
586576

587577
logger.info('Step {}, transforming completed'.format(self.name))
@@ -595,10 +585,8 @@ def _transform_operation(self, step_inputs):
595585
try:
596586
step_output_data = self.transformer.transform(**step_inputs)
597587
except Exception as e:
598-
msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
599-
'This Transformer is not fittable. ' \
600-
'Check "Step.transformer" implementation"'.format(self.name,
601-
self.transformer.__class__.__name__)
588+
msg = 'Step {}, Transformer "{}" error ' \
589+
'during "transform()" operation.'.format(self.name, self.transformer.__class__.__name__)
602590
raise StepError(msg) from e
603591

604592
logger.info('Step {}, transforming completed'.format(self.name))
@@ -652,6 +640,7 @@ def _prepare_experiment_directories(self):
652640
os.makedirs(os.path.join(self.experiment_directory, dir_name), exist_ok=True)
653641

654642
def _get_steps(self, all_steps):
643+
self._check_name_uniqueness(all_steps=all_steps)
655644
for input_step in self.input_steps:
656645
all_steps = input_step._get_steps(all_steps)
657646
all_steps[self.name] = self
@@ -670,19 +659,16 @@ def _validate_step_name(self, name):
670659
assert isinstance(name, str) or isinstance(name, float) or isinstance(name, int),\
671660
'Step name must be str, float or int. Got {} instead.'.format(type(name))
672661

673-
def _apply_suffix(self, name):
674-
"""returns suffix '_k'
675-
Where 'k' is int that denotes highest increment of step with the same name.
676-
"""
677-
highest_id = 0
678-
for x in _ALL_STEPS_NAMES:
679-
if not x == name:
680-
key_id = x.split('_')[-1]
681-
key_stripped = x[:-len(key_id) - 1]
682-
if key_stripped == name:
683-
if int(key_id) >= highest_id:
684-
highest_id += 1
685-
return '{}_{}'.format(name, highest_id)
662+
def _check_name_uniqueness(self, all_steps):
663+
if self.name in all_steps.keys():
664+
raise ValueError('Step with name "{}", already exist. Assign unique Step name.'.format(self.name))
665+
666+
def _validate_upstream_names(self):
667+
try:
668+
_ = self.all_upstream_steps.keys()
669+
except ValueError as e:
670+
msg = 'Incorrect Step names'
671+
raise StepError(msg) from e
686672

687673
def _build_structure_dict(self, structure_dict):
688674
for input_step in self.input_steps:

0 commit comments

Comments
 (0)