Skip to content

Commit a4dd273

Browse files
authored
fix about tensorboard (#516)
* fix metricstracker * fix style
1 parent e538232 commit a4dd273

File tree

10 files changed

+91
-3
lines changed

10 files changed

+91
-3
lines changed

egs/librispeech/ASR/conformer_ctc/train.py

+11
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,17 @@ def compute_loss(
447447

448448
info["loss"] = loss.detach().cpu().item()
449449

450+
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
451+
info["utterances"] = feature.size(0)
452+
# averaged input duration in frames over utterances
453+
info["utt_duration"] = supervisions["num_frames"].sum().item()
454+
# averaged padding proportion over utterances
455+
info["utt_pad_proportion"] = (
456+
((feature.size(1) - supervisions["num_frames"]) / feature.size(1))
457+
.sum()
458+
.item()
459+
)
460+
450461
return loss, info
451462

452463

egs/librispeech/ASR/conformer_ctc2/train.py

+9
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,15 @@ def compute_loss(
605605
# Note: We use reduction=sum while computing the loss.
606606
info["loss"] = loss.detach().cpu().item()
607607

608+
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
609+
info["utterances"] = feature.size(0)
610+
# averaged input duration in frames over utterances
611+
info["utt_duration"] = feature_lens.sum().item()
612+
# averaged padding proportion over utterances
613+
info["utt_pad_proportion"] = (
614+
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
615+
)
616+
608617
return loss, info
609618

610619

egs/librispeech/ASR/streaming_conformer_ctc/train.py

+11
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,17 @@ def compute_loss(
430430

431431
info["loss"] = loss.detach().cpu().item()
432432

433+
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
434+
info["utterances"] = feature.size(0)
435+
# averaged input duration in frames over utterances
436+
info["utt_duration"] = supervisions["num_frames"].sum().item()
437+
# averaged padding proportion over utterances
438+
info["utt_pad_proportion"] = (
439+
((feature.size(1) - supervisions["num_frames"]) / feature.size(1))
440+
.sum()
441+
.item()
442+
)
443+
433444
return loss, info
434445

435446

egs/librispeech/ASR/tdnn_lstm_ctc/train.py

+11
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,17 @@ def compute_loss(
349349
info["frames"] = supervision_segments[:, 2].sum().item()
350350
info["loss"] = loss.detach().cpu().item()
351351

352+
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
353+
info["utterances"] = feature.size(0)
354+
# averaged input duration in frames over utterances
355+
info["utt_duration"] = supervisions["num_frames"].sum().item()
356+
# averaged padding proportion over utterances
357+
info["utt_pad_proportion"] = (
358+
((feature.size(2) - supervisions["num_frames"]) / feature.size(2))
359+
.sum()
360+
.item()
361+
)
362+
352363
return loss, info
353364

354365

egs/librispeech/ASR/transducer/train.py

+9
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,15 @@ def compute_loss(
403403
# Note: We use reduction=sum while computing the loss.
404404
info["loss"] = loss.detach().cpu().item()
405405

406+
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
407+
info["utterances"] = feature.size(0)
408+
# averaged input duration in frames over utterances
409+
info["utt_duration"] = feature_lens.sum().item()
410+
# averaged padding proportion over utterances
411+
info["utt_pad_proportion"] = (
412+
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
413+
)
414+
406415
return loss, info
407416

408417

egs/librispeech/ASR/transducer_lstm/train.py

+9
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,15 @@ def compute_loss(
407407
# Note: We use reduction=sum while computing the loss.
408408
info["loss"] = loss.detach().cpu().item()
409409

410+
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
411+
info["utterances"] = feature.size(0)
412+
# averaged input duration in frames over utterances
413+
info["utt_duration"] = feature_lens.sum().item()
414+
# averaged padding proportion over utterances
415+
info["utt_pad_proportion"] = (
416+
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
417+
)
418+
410419
return loss, info
411420

412421

egs/librispeech/ASR/transducer_stateless/train.py

+9
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,15 @@ def compute_loss(
429429
# Note: We use reduction=sum while computing the loss.
430430
info["loss"] = loss.detach().cpu().item()
431431

432+
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
433+
info["utterances"] = feature.size(0)
434+
# averaged input duration in frames over utterances
435+
info["utt_duration"] = feature_lens.sum().item()
436+
# averaged padding proportion over utterances
437+
info["utt_pad_proportion"] = (
438+
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
439+
)
440+
432441
return loss, info
433442

434443

egs/librispeech/ASR/transducer_stateless2/train.py

+9
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,15 @@ def compute_loss(
417417
# Note: We use reduction=sum while computing the loss.
418418
info["loss"] = loss.detach().cpu().item()
419419

420+
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
421+
info["utterances"] = feature.size(0)
422+
# averaged input duration in frames over utterances
423+
info["utt_duration"] = feature_lens.sum().item()
424+
# averaged padding proportion over utterances
425+
info["utt_pad_proportion"] = (
426+
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
427+
)
428+
420429
return loss, info
421430

422431

egs/librispeech/ASR/transducer_stateless_multi_datasets/train.py

+9
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,15 @@ def compute_loss(
476476
# Note: We use reduction=sum while computing the loss.
477477
info["loss"] = loss.detach().cpu().item()
478478

479+
# `utt_duration` and `utt_pad_proportion` would be normalized by `utterances` # noqa
480+
info["utterances"] = feature.size(0)
481+
# averaged input duration in frames over utterances
482+
info["utt_duration"] = feature_lens.sum().item()
483+
# averaged padding proportion over utterances
484+
info["utt_pad_proportion"] = (
485+
((feature.size(1) - feature_lens) / feature.size(1)).sum().item()
486+
)
487+
479488
return loss, info
480489

481490

icefall/utils.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -544,9 +544,10 @@ def __str__(self) -> str:
544544
else:
545545
raise ValueError(f"Unexpected key: {k}")
546546
frames = "%.2f" % self["frames"]
547-
ans_frames += "over " + str(frames) + " frames; "
548-
utterances = "%.2f" % self["utterances"]
549-
ans_utterances += "over " + str(utterances) + " utterances."
547+
ans_frames += "over " + str(frames) + " frames. "
548+
if ans_utterances != "":
549+
utterances = "%.2f" % self["utterances"]
550+
ans_utterances += "over " + str(utterances) + " utterances."
550551

551552
return ans_frames + ans_utterances
552553

0 commit comments

Comments
 (0)