@@ -83,7 +83,6 @@ class JiebaLexicon::Impl {
83
83
std::vector<TokenIDs> ans;
84
84
std::vector<int64_t > this_sentence;
85
85
86
- int32_t blank = token2id_.at (" " );
87
86
for (const auto &w : words) {
88
87
auto ids = ConvertWordToIds (w);
89
88
if (ids.empty ()) {
@@ -92,7 +91,6 @@ class JiebaLexicon::Impl {
92
91
}
93
92
94
93
this_sentence.insert (this_sentence.end (), ids.begin (), ids.end ());
95
- // this_sentence.push_back(blank);
96
94
97
95
if (w == " 。" || w == " !" || w == " ?" || w == " ," ) {
98
96
ans.emplace_back (std::move (this_sentence));
@@ -134,7 +132,9 @@ class JiebaLexicon::Impl {
134
132
token2id_ = ReadTokens (is);
135
133
136
134
std::vector<std::pair<std::string, std::string>> puncts = {
137
- {" ," , " ," }, {" ." , " 。" }, {" !" , " !" }, {" ?" , " ?" }};
135
+ {" ," , " ," }, {" ." , " 。" }, {" !" , " !" }, {" ?" , " ?" }, {" :" , " :" },
136
+ {" \" " , " “" }, {" \" " , " ”" }, {" '" , " ‘" }, {" '" , " ’" },
137
+ };
138
138
139
139
for (const auto &p : puncts) {
140
140
if (token2id_.count (p.first ) && !token2id_.count (p.second )) {
0 commit comments