Commit a4dd273 1 parent e538232 commit a4dd273 Copy full SHA for a4dd273
File tree 10 files changed +91
-3
lines changed
transducer_stateless_multi_datasets
10 files changed +91
-3
lines changed Original file line number Diff line number Diff line change @@ -447,6 +447,17 @@ def compute_loss(
447
447
448
448
info ["loss" ] = loss .detach ().cpu ().item ()
449
449
450
+ # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
451
+ info ["utterances" ] = feature .size (0 )
452
+ # averaged input duration in frames over utterances
453
+ info ["utt_duration" ] = supervisions ["num_frames" ].sum ().item ()
454
+ # averaged padding proportion over utterances
455
+ info ["utt_pad_proportion" ] = (
456
+ ((feature .size (1 ) - supervisions ["num_frames" ]) / feature .size (1 ))
457
+ .sum ()
458
+ .item ()
459
+ )
460
+
450
461
return loss , info
451
462
452
463
Original file line number Diff line number Diff line change @@ -605,6 +605,15 @@ def compute_loss(
605
605
# Note: We use reduction=sum while computing the loss.
606
606
info ["loss" ] = loss .detach ().cpu ().item ()
607
607
608
+ # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
609
+ info ["utterances" ] = feature .size (0 )
610
+ # averaged input duration in frames over utterances
611
+ info ["utt_duration" ] = feature_lens .sum ().item ()
612
+ # averaged padding proportion over utterances
613
+ info ["utt_pad_proportion" ] = (
614
+ ((feature .size (1 ) - feature_lens ) / feature .size (1 )).sum ().item ()
615
+ )
616
+
608
617
return loss , info
609
618
610
619
Original file line number Diff line number Diff line change @@ -430,6 +430,17 @@ def compute_loss(
430
430
431
431
info ["loss" ] = loss .detach ().cpu ().item ()
432
432
433
+ # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
434
+ info ["utterances" ] = feature .size (0 )
435
+ # averaged input duration in frames over utterances
436
+ info ["utt_duration" ] = supervisions ["num_frames" ].sum ().item ()
437
+ # averaged padding proportion over utterances
438
+ info ["utt_pad_proportion" ] = (
439
+ ((feature .size (1 ) - supervisions ["num_frames" ]) / feature .size (1 ))
440
+ .sum ()
441
+ .item ()
442
+ )
443
+
433
444
return loss , info
434
445
435
446
Original file line number Diff line number Diff line change @@ -349,6 +349,17 @@ def compute_loss(
349
349
info ["frames" ] = supervision_segments [:, 2 ].sum ().item ()
350
350
info ["loss" ] = loss .detach ().cpu ().item ()
351
351
352
+ # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
353
+ info ["utterances" ] = feature .size (0 )
354
+ # averaged input duration in frames over utterances
355
+ info ["utt_duration" ] = supervisions ["num_frames" ].sum ().item ()
356
+ # averaged padding proportion over utterances
357
+ info ["utt_pad_proportion" ] = (
358
+ ((feature .size (2 ) - supervisions ["num_frames" ]) / feature .size (2 ))
359
+ .sum ()
360
+ .item ()
361
+ )
362
+
352
363
return loss , info
353
364
354
365
Original file line number Diff line number Diff line change @@ -403,6 +403,15 @@ def compute_loss(
403
403
# Note: We use reduction=sum while computing the loss.
404
404
info ["loss" ] = loss .detach ().cpu ().item ()
405
405
406
+ # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
407
+ info ["utterances" ] = feature .size (0 )
408
+ # averaged input duration in frames over utterances
409
+ info ["utt_duration" ] = feature_lens .sum ().item ()
410
+ # averaged padding proportion over utterances
411
+ info ["utt_pad_proportion" ] = (
412
+ ((feature .size (1 ) - feature_lens ) / feature .size (1 )).sum ().item ()
413
+ )
414
+
406
415
return loss , info
407
416
408
417
Original file line number Diff line number Diff line change @@ -407,6 +407,15 @@ def compute_loss(
407
407
# Note: We use reduction=sum while computing the loss.
408
408
info ["loss" ] = loss .detach ().cpu ().item ()
409
409
410
+ # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
411
+ info ["utterances" ] = feature .size (0 )
412
+ # averaged input duration in frames over utterances
413
+ info ["utt_duration" ] = feature_lens .sum ().item ()
414
+ # averaged padding proportion over utterances
415
+ info ["utt_pad_proportion" ] = (
416
+ ((feature .size (1 ) - feature_lens ) / feature .size (1 )).sum ().item ()
417
+ )
418
+
410
419
return loss , info
411
420
412
421
Original file line number Diff line number Diff line change @@ -429,6 +429,15 @@ def compute_loss(
429
429
# Note: We use reduction=sum while computing the loss.
430
430
info ["loss" ] = loss .detach ().cpu ().item ()
431
431
432
+ # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
433
+ info ["utterances" ] = feature .size (0 )
434
+ # averaged input duration in frames over utterances
435
+ info ["utt_duration" ] = feature_lens .sum ().item ()
436
+ # averaged padding proportion over utterances
437
+ info ["utt_pad_proportion" ] = (
438
+ ((feature .size (1 ) - feature_lens ) / feature .size (1 )).sum ().item ()
439
+ )
440
+
432
441
return loss , info
433
442
434
443
Original file line number Diff line number Diff line change @@ -417,6 +417,15 @@ def compute_loss(
417
417
# Note: We use reduction=sum while computing the loss.
418
418
info ["loss" ] = loss .detach ().cpu ().item ()
419
419
420
+ # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
421
+ info ["utterances" ] = feature .size (0 )
422
+ # averaged input duration in frames over utterances
423
+ info ["utt_duration" ] = feature_lens .sum ().item ()
424
+ # averaged padding proportion over utterances
425
+ info ["utt_pad_proportion" ] = (
426
+ ((feature .size (1 ) - feature_lens ) / feature .size (1 )).sum ().item ()
427
+ )
428
+
420
429
return loss , info
421
430
422
431
Original file line number Diff line number Diff line change @@ -476,6 +476,15 @@ def compute_loss(
476
476
# Note: We use reduction=sum while computing the loss.
477
477
info ["loss" ] = loss .detach ().cpu ().item ()
478
478
479
+ # `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
480
+ info ["utterances" ] = feature .size (0 )
481
+ # averaged input duration in frames over utterances
482
+ info ["utt_duration" ] = feature_lens .sum ().item ()
483
+ # averaged padding proportion over utterances
484
+ info ["utt_pad_proportion" ] = (
485
+ ((feature .size (1 ) - feature_lens ) / feature .size (1 )).sum ().item ()
486
+ )
487
+
479
488
return loss , info
480
489
481
490
Original file line number Diff line number Diff line change @@ -544,9 +544,10 @@ def __str__(self) -> str:
544
544
else :
545
545
raise ValueError (f"Unexpected key: { k } " )
546
546
frames = "%.2f" % self ["frames" ]
547
- ans_frames += "over " + str (frames ) + " frames; "
548
- utterances = "%.2f" % self ["utterances" ]
549
- ans_utterances += "over " + str (utterances ) + " utterances."
547
+ ans_frames += "over " + str (frames ) + " frames. "
548
+ if ans_utterances != "" :
549
+ utterances = "%.2f" % self ["utterances" ]
550
+ ans_utterances += "over " + str (utterances ) + " utterances."
550
551
551
552
return ans_frames + ans_utterances
552
553
You can’t perform that action at this time.
0 commit comments