@@ -655,8 +655,14 @@ def _accept_all(key):
655
655
return True
656
656
657
657
658
- def iter_bucket (bucket_name , prefix = '' , accept_key = None ,
659
- key_limit = None , workers = 16 , retries = 3 ):
658
+ def iter_bucket (
659
+ bucket_name ,
660
+ prefix = '' ,
661
+ accept_key = None ,
662
+ key_limit = None ,
663
+ workers = 16 ,
664
+ retries = 3 ,
665
+ ** session_kwargs ):
660
666
"""
661
667
Iterate and download all S3 objects under `s3://bucket_name/prefix`.
662
668
@@ -676,6 +682,11 @@ def iter_bucket(bucket_name, prefix='', accept_key=None,
676
682
The number of subprocesses to use.
677
683
retries: int, optional
678
684
The number of time to retry a failed download.
685
+ session_kwargs: dict, optional
686
+ Keyword arguments to pass when creating a new session.
687
+ For a list of available names and values, see:
688
+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html#boto3.session.Session
689
+
679
690
680
691
Yields
681
692
------
@@ -716,8 +727,16 @@ def iter_bucket(bucket_name, prefix='', accept_key=None,
716
727
pass
717
728
718
729
total_size , key_no = 0 , - 1
719
- key_iterator = _list_bucket (bucket_name , prefix = prefix , accept_key = accept_key )
720
- download_key = functools .partial (_download_key , bucket_name = bucket_name , retries = retries )
730
+ key_iterator = _list_bucket (
731
+ bucket_name ,
732
+ prefix = prefix ,
733
+ accept_key = accept_key ,
734
+ ** session_kwargs )
735
+ download_key = functools .partial (
736
+ _download_key ,
737
+ bucket_name = bucket_name ,
738
+ retries = retries ,
739
+ ** session_kwargs )
721
740
722
741
with _create_process_pool (processes = workers ) as pool :
723
742
result_iterator = pool .imap_unordered (download_key , key_iterator )
@@ -736,8 +755,13 @@ def iter_bucket(bucket_name, prefix='', accept_key=None,
736
755
logger .info ("processed %i keys, total size %i" % (key_no + 1 , total_size ))
737
756
738
757
739
- def _list_bucket (bucket_name , prefix = '' , accept_key = lambda k : True ):
740
- client = boto3 .client ('s3' )
758
+ def _list_bucket (
759
+ bucket_name ,
760
+ prefix = '' ,
761
+ accept_key = lambda k : True ,
762
+ ** session_kwargs ):
763
+ session = boto3 .session .Session (** session_kwargs )
764
+ client = session .client ('s3' )
741
765
ctoken = None
742
766
743
767
while True :
@@ -762,14 +786,14 @@ def _list_bucket(bucket_name, prefix='', accept_key=lambda k: True):
762
786
break
763
787
764
788
765
- def _download_key (key_name , bucket_name = None , retries = 3 ):
789
+ def _download_key (key_name , bucket_name = None , retries = 3 , ** session_kwargs ):
766
790
if bucket_name is None :
767
791
raise ValueError ('bucket_name may not be None' )
768
792
769
793
#
770
794
# https://geekpete.com/blog/multithreading-boto3/
771
795
#
772
- session = boto3 .session .Session ()
796
+ session = boto3 .session .Session (** session_kwargs )
773
797
s3 = session .resource ('s3' )
774
798
bucket = s3 .Bucket (bucket_name )
775
799
0 commit comments