18
18
'load_persisted_output' : False
19
19
}
20
20
21
+ _ALL_STEPS_NAMES = list ()
22
+
21
23
22
24
class Step :
23
25
"""Step is a building block of steppy pipelines.
@@ -178,42 +180,41 @@ def __init__(self,
178
180
cache_output = False ,
179
181
load_persisted_output = False ):
180
182
181
- self . name = self ._format_step_name (name , transformer )
183
+ name = self ._format_step_name (name , transformer )
182
184
183
185
if experiment_directory is not None :
184
186
assert isinstance (experiment_directory , str ),\
185
187
'Step {} error, experiment_directory must ' \
186
- 'be str, got {} instead.' .format (self . name , type (experiment_directory ))
188
+ 'be str, got {} instead.' .format (name , type (experiment_directory ))
187
189
else :
188
190
experiment_directory = os .path .join (os .path .expanduser ("~" ), '.steppy' )
189
191
logger .info ('Using default experiment directory: {}' .format (experiment_directory ))
190
192
191
193
if output_directory is not None :
192
194
assert isinstance (output_directory , str ),\
193
- 'Step {}, output_directory must be str, got {} instead' .format (self . name , type (output_directory ))
195
+ 'Step {}, output_directory must be str, got {} instead' .format (name , type (output_directory ))
194
196
195
197
if input_data is not None :
196
198
assert isinstance (input_data , list ), 'Step {} error, input_data must be list, ' \
197
- 'got {} instead.' .format (self . name , type (input_data ))
199
+ 'got {} instead.' .format (name , type (input_data ))
198
200
if input_steps is not None :
199
201
assert isinstance (input_steps , list ), 'Step {} error, input_steps must be list, ' \
200
- 'got {} instead.' .format (self . name , type (input_steps ))
202
+ 'got {} instead.' .format (name , type (input_steps ))
201
203
if adapter is not None :
202
204
assert isinstance (adapter , Adapter ), 'Step {} error, adapter must be an instance ' \
203
- 'of {}' .format (self . name , str (Adapter ))
205
+ 'of {}' .format (name , str (Adapter ))
204
206
205
207
assert isinstance (cache_output , bool ), 'Step {} error, cache_output must be bool, ' \
206
- 'got {} instead.' .format (self . name , type (cache_output ))
208
+ 'got {} instead.' .format (name , type (cache_output ))
207
209
assert isinstance (persist_output , bool ), 'Step {} error, persist_output must be bool, ' \
208
- 'got {} instead.' .format (self . name , type (persist_output ))
210
+ 'got {} instead.' .format (name , type (persist_output ))
209
211
assert isinstance (load_persisted_output , bool ),\
210
212
'Step {} error, load_persisted_output ' \
211
- 'must be bool, got {} instead.' .format (self . name , type (load_persisted_output ))
213
+ 'must be bool, got {} instead.' .format (name , type (load_persisted_output ))
212
214
assert isinstance (force_fitting , bool ), 'Step {} error, force_fitting must be bool, ' \
213
- 'got {} instead.' .format (self . name , type (force_fitting ))
215
+ 'got {} instead.' .format (name , type (force_fitting ))
214
216
215
- self ._validate_upstream_names ()
216
- logger .info ('Initializing Step {}' .format (self .name ))
217
+ logger .info ('Initializing Step {}' .format (name ))
217
218
218
219
self .transformer = transformer
219
220
self .output_directory = output_directory
@@ -227,7 +228,11 @@ def __init__(self,
227
228
self .force_fitting = force_fitting
228
229
229
230
self .output = None
231
+ self .name = self ._apply_suffix (name )
232
+ _ALL_STEPS_NAMES .append (self .name )
233
+
230
234
self .experiment_directory = os .path .join (experiment_directory )
235
+
231
236
self ._prepare_experiment_directories ()
232
237
self ._mode = 'train'
233
238
@@ -487,7 +492,7 @@ def get_step_by_name(self, name):
487
492
return self .all_upstream_steps [name ]
488
493
except KeyError as e :
489
494
msg = 'No Step with name "{}" found. ' \
490
- 'You have following Steps: {}' .format (name , list ( self . all_upstream_steps . keys ()) )
495
+ 'You have following Steps: {}' .format (name , _ALL_STEPS_NAMES )
491
496
raise StepError (msg ) from e
492
497
493
498
def persist_upstream_structure (self ):
@@ -520,8 +525,9 @@ def _fit_transform_operation(self, step_inputs):
520
525
try :
521
526
step_output_data = self .transformer .transform (** step_inputs )
522
527
except Exception as e :
523
- msg = 'Step {}, Transformer "{}" error ' \
524
- 'during "transform()" operation.' .format (self .name , self .transformer .__class__ .__name__ )
528
+ msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
529
+ 'Check "Step.transformer" implementation"' .format (self .name ,
530
+ self .transformer .__class__ .__name__ )
525
531
raise StepError (msg ) from e
526
532
527
533
logger .info ('Step {}, transforming completed' .format (self .name ))
@@ -531,8 +537,9 @@ def _fit_transform_operation(self, step_inputs):
531
537
try :
532
538
step_output_data = self .transformer .fit_transform (** step_inputs )
533
539
except Exception as e :
534
- msg = 'Step {}, Transformer "{}" error ' \
535
- 'during "fit_transform()" operation.' .format (self .name , self .transformer .__class__ .__name__ )
540
+ msg = 'Step {}, Transformer "{}" error during "fit_transform()" operation. ' \
541
+ 'Check "Step.transformer" implementation"' .format (self .name ,
542
+ self .transformer .__class__ .__name__ )
536
543
raise StepError (msg ) from e
537
544
538
545
logger .info ('Step {}, fitting and transforming completed' .format (self .name ))
@@ -545,8 +552,10 @@ def _fit_transform_operation(self, step_inputs):
545
552
try :
546
553
step_output_data = self .transformer .transform (** step_inputs )
547
554
except Exception as e :
548
- msg = 'Step {}, Transformer "{}" error ' \
549
- 'during "transform()" operation.' .format (self .name , self .transformer .__class__ .__name__ )
555
+ msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
556
+ 'This Transformer is not fittable. ' \
557
+ 'Check "Step.transformer" implementation"' .format (self .name ,
558
+ self .transformer .__class__ .__name__ )
550
559
raise StepError (msg ) from e
551
560
552
561
logger .info ('Step {}, transforming completed' .format (self .name ))
@@ -570,8 +579,9 @@ def _transform_operation(self, step_inputs):
570
579
try :
571
580
step_output_data = self .transformer .transform (** step_inputs )
572
581
except Exception as e :
573
- msg = 'Step {}, Transformer "{}" error ' \
574
- 'during "transform()" operation.' .format (self .name , self .transformer .__class__ .__name__ )
582
+ msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
583
+ 'Check "Step.transformer" implementation"' .format (self .name ,
584
+ self .transformer .__class__ .__name__ )
575
585
raise StepError (msg ) from e
576
586
577
587
logger .info ('Step {}, transforming completed' .format (self .name ))
@@ -585,8 +595,10 @@ def _transform_operation(self, step_inputs):
585
595
try :
586
596
step_output_data = self .transformer .transform (** step_inputs )
587
597
except Exception as e :
588
- msg = 'Step {}, Transformer "{}" error ' \
589
- 'during "transform()" operation.' .format (self .name , self .transformer .__class__ .__name__ )
598
+ msg = 'Step {}, Transformer "{}" error during "transform()" operation. ' \
599
+ 'This Transformer is not fittable. ' \
600
+ 'Check "Step.transformer" implementation"' .format (self .name ,
601
+ self .transformer .__class__ .__name__ )
590
602
raise StepError (msg ) from e
591
603
592
604
logger .info ('Step {}, transforming completed' .format (self .name ))
@@ -640,7 +652,6 @@ def _prepare_experiment_directories(self):
640
652
os .makedirs (os .path .join (self .experiment_directory , dir_name ), exist_ok = True )
641
653
642
654
def _get_steps (self , all_steps ):
643
- self ._check_name_uniqueness (all_steps = all_steps )
644
655
for input_step in self .input_steps :
645
656
all_steps = input_step ._get_steps (all_steps )
646
657
all_steps [self .name ] = self
@@ -659,16 +670,19 @@ def _validate_step_name(self, name):
659
670
assert isinstance (name , str ) or isinstance (name , float ) or isinstance (name , int ),\
660
671
'Step name must be str, float or int. Got {} instead.' .format (type (name ))
661
672
662
- def _check_name_uniqueness (self , all_steps ):
663
- if self .name in all_steps .keys ():
664
- raise ValueError ('Step with name "{}", already exist. Assign unique Step name.' .format (self .name ))
665
-
666
- def _validate_upstream_names (self ):
667
- try :
668
- _ = self .all_upstream_steps .keys ()
669
- except ValueError as e :
670
- msg = 'Incorrect Step names'
671
- raise StepError (msg ) from e
673
+ def _apply_suffix (self , name ):
674
+ """returns suffix '_k'
675
+ Where 'k' is int that denotes highest increment of step with the same name.
676
+ """
677
+ highest_id = 0
678
+ for x in _ALL_STEPS_NAMES :
679
+ if not x == name :
680
+ key_id = x .split ('_' )[- 1 ]
681
+ key_stripped = x [:- len (key_id ) - 1 ]
682
+ if key_stripped == name :
683
+ if int (key_id ) >= highest_id :
684
+ highest_id += 1
685
+ return '{}_{}' .format (name , highest_id )
672
686
673
687
def _build_structure_dict (self , structure_dict ):
674
688
for input_step in self .input_steps :
0 commit comments