18
18
'load_persisted_output' : False
19
19
}
20
20
21
- _ALL_STEPS_NAMES = list ()
22
-
23
21
24
22
class Step :
25
23
"""Step is a building block of steppy pipelines.
@@ -180,41 +178,42 @@ def __init__(self,
180
178
cache_output = False ,
181
179
load_persisted_output = False ):
182
180
183
- name = self ._format_step_name (name , transformer )
181
+ self . name = self ._format_step_name (name , transformer )
184
182
185
183
if experiment_directory is not None :
186
184
assert isinstance (experiment_directory , str ),\
187
185
'Step {} error, experiment_directory must ' \
188
- 'be str, got {} instead.' .format (name , type (experiment_directory ))
186
+ 'be str, got {} instead.' .format (self . name , type (experiment_directory ))
189
187
else :
190
188
experiment_directory = os .path .join (os .path .expanduser ("~" ), '.steppy' )
191
189
logger .info ('Using default experiment directory: {}' .format (experiment_directory ))
192
190
193
191
if output_directory is not None :
194
192
assert isinstance (output_directory , str ),\
195
- 'Step {}, output_directory must be str, got {} instead' .format (name , type (output_directory ))
193
+ 'Step {}, output_directory must be str, got {} instead' .format (self . name , type (output_directory ))
196
194
197
195
if input_data is not None :
198
196
assert isinstance (input_data , list ), 'Step {} error, input_data must be list, ' \
199
- 'got {} instead.' .format (name , type (input_data ))
197
+ 'got {} instead.' .format (self . name , type (input_data ))
200
198
if input_steps is not None :
201
199
assert isinstance (input_steps , list ), 'Step {} error, input_steps must be list, ' \
202
- 'got {} instead.' .format (name , type (input_steps ))
200
+ 'got {} instead.' .format (self . name , type (input_steps ))
203
201
if adapter is not None :
204
202
assert isinstance (adapter , Adapter ), 'Step {} error, adapter must be an instance ' \
205
- 'of {}' .format (name , str (Adapter ))
203
+ 'of {}' .format (self . name , str (Adapter ))
206
204
207
205
assert isinstance (cache_output , bool ), 'Step {} error, cache_output must be bool, ' \
208
- 'got {} instead.' .format (name , type (cache_output ))
206
+ 'got {} instead.' .format (self . name , type (cache_output ))
209
207
assert isinstance (persist_output , bool ), 'Step {} error, persist_output must be bool, ' \
210
- 'got {} instead.' .format (name , type (persist_output ))
208
+ 'got {} instead.' .format (self . name , type (persist_output ))
211
209
assert isinstance (load_persisted_output , bool ),\
212
210
'Step {} error, load_persisted_output ' \
213
- 'must be bool, got {} instead.' .format (name , type (load_persisted_output ))
211
+ 'must be bool, got {} instead.' .format (self . name , type (load_persisted_output ))
214
212
assert isinstance (force_fitting , bool ), 'Step {} error, force_fitting must be bool, ' \
215
- 'got {} instead.' .format (name , type (force_fitting ))
213
+ 'got {} instead.' .format (self . name , type (force_fitting ))
216
214
217
- logger .info ('Initializing Step {}' .format (name ))
215
+ self ._validate_upstream_names ()
216
+ logger .info ('Initializing Step {}' .format (self .name ))
218
217
219
218
self .transformer = transformer
220
219
self .output_directory = output_directory
@@ -228,11 +227,7 @@ def __init__(self,
228
227
self .force_fitting = force_fitting
229
228
230
229
self .output = None
231
- self .name = self ._apply_suffix (name )
232
- _ALL_STEPS_NAMES .append (self .name )
233
-
234
230
self .experiment_directory = os .path .join (experiment_directory )
235
-
236
231
self ._prepare_experiment_directories ()
237
232
self ._mode = 'train'
238
233
@@ -492,7 +487,7 @@ def get_step_by_name(self, name):
492
487
return self .all_upstream_steps [name ]
493
488
except KeyError as e :
494
489
msg = 'No Step with name "{}" found. ' \
495
- 'You have following Steps: {}' .format (name , _ALL_STEPS_NAMES )
490
+ 'You have following Steps: {}' .format (name , list ( self . all_upstream_steps . keys ()) )
496
491
raise StepError (msg ) from e
497
492
498
493
def persist_upstream_structure (self ):
@@ -525,9 +520,8 @@ def _fit_transform_operation(self, step_inputs):
525
520
try :
526
521
step_output_data = self .transformer .transform (** step_inputs )
527
522
except Exception as e :
528
- msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
529
- 'Check "Step.transformer" implementation"' .format (self .name ,
530
- self .transformer .__class__ .__name__ )
523
+ msg = 'Step {}, Transformer "{}" error ' \
524
+ 'during "transform()" operation.' .format (self .name , self .transformer .__class__ .__name__ )
531
525
raise StepError (msg ) from e
532
526
533
527
logger .info ('Step {}, transforming completed' .format (self .name ))
@@ -537,9 +531,8 @@ def _fit_transform_operation(self, step_inputs):
537
531
try :
538
532
step_output_data = self .transformer .fit_transform (** step_inputs )
539
533
except Exception as e :
540
- msg = 'Step {}, Transformer "{}" error during "fit_transform()" operation. ' \
541
- 'Check "Step.transformer" implementation"' .format (self .name ,
542
- self .transformer .__class__ .__name__ )
534
+ msg = 'Step {}, Transformer "{}" error ' \
535
+ 'during "fit_transform()" operation.' .format (self .name , self .transformer .__class__ .__name__ )
543
536
raise StepError (msg ) from e
544
537
545
538
logger .info ('Step {}, fitting and transforming completed' .format (self .name ))
@@ -552,10 +545,8 @@ def _fit_transform_operation(self, step_inputs):
552
545
try :
553
546
step_output_data = self .transformer .transform (** step_inputs )
554
547
except Exception as e :
555
- msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
556
- 'This Transformer is not fittable. ' \
557
- 'Check "Step.transformer" implementation"' .format (self .name ,
558
- self .transformer .__class__ .__name__ )
548
+ msg = 'Step {}, Transformer "{}" error ' \
549
+ 'during "transform()" operation.' .format (self .name , self .transformer .__class__ .__name__ )
559
550
raise StepError (msg ) from e
560
551
561
552
logger .info ('Step {}, transforming completed' .format (self .name ))
@@ -579,9 +570,8 @@ def _transform_operation(self, step_inputs):
579
570
try :
580
571
step_output_data = self .transformer .transform (** step_inputs )
581
572
except Exception as e :
582
- msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
583
- 'Check "Step.transformer" implementation"' .format (self .name ,
584
- self .transformer .__class__ .__name__ )
573
+ msg = 'Step {}, Transformer "{}" error ' \
574
+ 'during "transform()" operation.' .format (self .name , self .transformer .__class__ .__name__ )
585
575
raise StepError (msg ) from e
586
576
587
577
logger .info ('Step {}, transforming completed' .format (self .name ))
@@ -595,10 +585,8 @@ def _transform_operation(self, step_inputs):
595
585
try :
596
586
step_output_data = self .transformer .transform (** step_inputs )
597
587
except Exception as e :
598
- msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
599
- 'This Transformer is not fittable. ' \
600
- 'Check "Step.transformer" implementation"' .format (self .name ,
601
- self .transformer .__class__ .__name__ )
588
+ msg = 'Step {}, Transformer "{}" error ' \
589
+ 'during "transform()" operation.' .format (self .name , self .transformer .__class__ .__name__ )
602
590
raise StepError (msg ) from e
603
591
604
592
logger .info ('Step {}, transforming completed' .format (self .name ))
@@ -652,6 +640,7 @@ def _prepare_experiment_directories(self):
652
640
os .makedirs (os .path .join (self .experiment_directory , dir_name ), exist_ok = True )
653
641
654
642
def _get_steps (self , all_steps ):
643
+ self ._check_name_uniqueness (all_steps = all_steps )
655
644
for input_step in self .input_steps :
656
645
all_steps = input_step ._get_steps (all_steps )
657
646
all_steps [self .name ] = self
@@ -670,19 +659,16 @@ def _validate_step_name(self, name):
670
659
assert isinstance (name , str ) or isinstance (name , float ) or isinstance (name , int ),\
671
660
'Step name must be str, float or int. Got {} instead.' .format (type (name ))
672
661
673
- def _apply_suffix (self , name ):
674
- """returns suffix '_k'
675
- Where 'k' is int that denotes highest increment of step with the same name.
676
- """
677
- highest_id = 0
678
- for x in _ALL_STEPS_NAMES :
679
- if not x == name :
680
- key_id = x .split ('_' )[- 1 ]
681
- key_stripped = x [:- len (key_id ) - 1 ]
682
- if key_stripped == name :
683
- if int (key_id ) >= highest_id :
684
- highest_id += 1
685
- return '{}_{}' .format (name , highest_id )
662
+ def _check_name_uniqueness (self , all_steps ):
663
+ if self .name in all_steps .keys ():
664
+ raise ValueError ('Step with name "{}", already exist. Assign unique Step name.' .format (self .name ))
665
+
666
+ def _validate_upstream_names (self ):
667
+ try :
668
+ _ = self .all_upstream_steps .keys ()
669
+ except ValueError as e :
670
+ msg = 'Incorrect Step names'
671
+ raise StepError (msg ) from e
686
672
687
673
def _build_structure_dict (self , structure_dict ):
688
674
for input_step in self .input_steps :
0 commit comments