Skip to content

Commit 2dc4d43

Browse files
committed
Fix for matcha models from icefall
1 parent 7315226 commit 2dc4d43

5 files changed

+14
-30
lines changed

sherpa-onnx/csrc/jieba-lexicon.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ class JiebaLexicon::Impl {
8383
std::vector<TokenIDs> ans;
8484
std::vector<int64_t> this_sentence;
8585

86-
int32_t blank = token2id_.at(" ");
8786
for (const auto &w : words) {
8887
auto ids = ConvertWordToIds(w);
8988
if (ids.empty()) {
@@ -92,7 +91,6 @@ class JiebaLexicon::Impl {
9291
}
9392

9493
this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());
95-
// this_sentence.push_back(blank);
9694

9795
if (w == "" || w == "" || w == "" || w == "") {
9896
ans.emplace_back(std::move(this_sentence));
@@ -134,7 +132,9 @@ class JiebaLexicon::Impl {
134132
token2id_ = ReadTokens(is);
135133

136134
std::vector<std::pair<std::string, std::string>> puncts = {
137-
{",", ""}, {".", ""}, {"!", ""}, {"?", ""}};
135+
{",", ""}, {".", ""}, {"!", ""}, {"?", ""}, {":", ""},
136+
{"\"", ""}, {"\"", ""}, {"'", ""}, {"'", ""},
137+
};
138138

139139
for (const auto &p : puncts) {
140140
if (token2id_.count(p.first) && !token2id_.count(p.second)) {

sherpa-onnx/csrc/offline-tts-matcha-impl.h

+2-22
Original file line numberDiff line numberDiff line change
@@ -248,28 +248,8 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl {
248248
SHERPA_ONNX_LOGE("%s", os.str().c_str());
249249
}
250250

251-
if (meta_data.add_blank) {
252-
for (auto &k : x) {
253-
k = AddBlank(k);
254-
}
255-
256-
if (config_.model.debug) {
257-
std::ostringstream os;
258-
os << "\n";
259-
for (const auto &k : x) {
260-
for (int32_t i : k) {
261-
os << i << " ";
262-
}
263-
os << "\n";
264-
}
265-
os << "\n";
266-
SHERPA_ONNX_LOGE("%s", os.str().c_str());
267-
}
268-
269-
for (auto &k : x) {
270-
// TODO(fangjun): Fix it!
271-
k = AddBlank(k, 62);
272-
}
251+
for (auto &k : x) {
252+
k = AddBlank(k, meta_data.pad_id);
273253
}
274254

275255
if (config_.model.debug) {

sherpa-onnx/csrc/offline-tts-matcha-model-metadata.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@ namespace sherpa_onnx {
1515
// you have downloaded.
1616
struct OfflineTtsMatchaModelMetaData {
1717
int32_t sample_rate = 0;
18-
int32_t add_blank = 0;
1918
int32_t num_speakers = 0;
2019
int32_t version = 1;
20+
int32_t jieba = 0;
21+
int32_t espeak = 0;
22+
int32_t use_eos_bos = 0;
23+
int32_t pad_id = 0;
2124
};
2225

2326
} // namespace sherpa_onnx

sherpa-onnx/csrc/offline-tts-matcha-model.cc

+4-3
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,12 @@ class OfflineTtsMatchaModel::Impl {
139139

140140
Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
141141
SHERPA_ONNX_READ_META_DATA(meta_data_.sample_rate, "sample_rate");
142-
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.add_blank, "add_blank",
143-
1);
144-
145142
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.version, "version", 1);
146143
SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "n_speakers");
144+
SHERPA_ONNX_READ_META_DATA(meta_data_.jieba, "jieba");
145+
SHERPA_ONNX_READ_META_DATA(meta_data_.espeak, "has_espeak");
146+
SHERPA_ONNX_READ_META_DATA(meta_data_.use_eos_bos, "use_eos_bos");
147+
SHERPA_ONNX_READ_META_DATA(meta_data_.pad_id, "pad_id");
147148
}
148149

149150
private:

sherpa-onnx/csrc/offline-tts-vits-model.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ class OfflineTtsVitsModel::Impl {
174174
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.bos_id, "bos_id", 0);
175175
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.eos_id, "eos_id", 0);
176176
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.use_eos_bos,
177-
"use_eos_bos", 0);
177+
"use_eos_bos", 1);
178178
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.pad_id, "pad_id", 0);
179179

180180
std::string comment;

0 commit comments

Comments
 (0)