14
14
from billiard import Process , Queue , Pool
15
15
16
16
from dataflows .processors .dumpers .dumper_base import DumperBase
17
- from dataflows .processors .dumpers .file_formats import CSVFormat , JSONFormat , FileFormat
17
+ from dataflows .processors .dumpers .formats import CSVFormat , JSONFormat , FileFormat
18
18
from bcodmo_frictionless .bcodmo_pipeline_processors .helper import (
19
19
get_redis_progress_key ,
20
20
get_redis_progress_resource_key ,
25
25
REDIS_PROGRESS_SAVING_DONE_FLAG ,
26
26
REDIS_EXPIRES ,
27
27
)
28
+ from bcodmo_frictionless .bcodmo_pipeline_processors .helper import get_missing_values
28
29
29
30
WINDOWS_LINE_ENDING = b"\r \n "
30
31
UNIX_LINE_ENDING = b"\n "
@@ -543,63 +544,33 @@ def rows_processor(self, resource, writer, stream):
543
544
row_number = None
544
545
upload_id = None
545
546
546
- writer_timer_sum = 0
547
- process_timer_sum = 0
548
- process_timer_count = 0
549
- process_timer = None
550
- redis_timer_sum = 0
551
-
552
547
try :
553
548
row_number = 0
554
549
part_number = 0
555
- start = time .time ()
556
550
timer = time .time ()
557
551
558
- yield_start = time .time ()
559
- yield_total = 0
560
- async_total = 0
561
- loop_total = 0
562
552
for row in resource :
563
- loop_total += time .time () - yield_start
564
-
565
553
row_number += 1
566
554
567
- writer_timer = time .time ()
568
555
writer .write_row (row )
569
- writer_timer_sum += time .time () - writer_timer
570
556
571
- redis_timer = time .time ()
572
557
if redis_conn is not None and time .time () - timer > 0.75 :
573
558
redis_conn .set (progress_key , row_number , ex = REDIS_EXPIRES )
574
559
timer = time .time ()
575
- redis_timer_sum += time .time () - redis_timer
576
560
577
- async_start = time .time ()
578
- if row_number % 25 == 0 and stream .tell () > calculate_partsize (
561
+ if row_number % 100 == 0 and stream .tell () > calculate_partsize (
579
562
part_number
580
563
):
581
564
part_number , upload_id , writer , stream = self .async_write_part (
582
565
stream , resource , part_number , object_key , upload_id , False
583
566
)
584
- async_total += time .time () - async_start
585
-
586
- yield_total += time .time () - yield_start
587
- if (row_number + 1 ) % 10000 == 0 :
588
- # print(
589
- # f"total {yield_total}. async {async_total}. redis {redis_timer_sum}. writer {writer_timer_sum}. loop {loop_total}"
590
- # )
591
- yield_total = 0
592
- async_total = 0
593
- redis_timer_sum = 0
594
- writer_timer_sum = 0
595
- loop_total = 0
567
+
596
568
if (
597
569
self .limit_yield is None
598
570
or self .limit_yield < 0
599
571
or row_number <= self .limit_yield
600
572
):
601
573
yield row
602
- yield_start = time .time ()
603
574
# Set row number values
604
575
DumperBase .inc_attr (
605
576
self .datapackage .descriptor , self .datapackage_rowcount , row_number
@@ -613,6 +584,7 @@ def rows_processor(self, resource, writer, stream):
613
584
row_number = None
614
585
writer .finalize_file ()
615
586
# Upload final part
587
+ print ("Starting last upload" )
616
588
part_number , _ , _ , stream = self .async_write_part (
617
589
stream , resource , part_number , object_key , upload_id , True
618
590
)
0 commit comments