Skip to content

Commit f2d2015

Browse files
committed
Yoroshiku v0.0.1
1 parent 99176e1 commit f2d2015

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+4528
-0
lines changed

examples/yoroshiku.js

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/**
2+
* Japanese Romaji Translitter
3+
* @author Loreto Parisi (loretoparisi at gmail dot com)
4+
* @2019 Loreto Parisi
5+
*/
6+
7+
const Yoroshiku = require('../lib/index');
8+
9+
// initialize Yoroshiku
10+
const yoroshiku = new Yoroshiku();
11+
12+
// load the module
13+
yoroshiku.load()
14+
.then(_ => {
15+
// transliterate single
16+
return yoroshiku.transliterate("僕らは完全無欠じ~ゃ無い",
17+
{ to: "romaji", mode: "spaced", romajiSystem: "hepburn" })
18+
})
19+
.then(res => {
20+
console.log(JSON.stringify(res));
21+
// transliterare parallel
22+
const docs = [
23+
"繫がっているから",
24+
"理想と现実の満员电车",
25+
"生まれちゃうっ!(*ノωノ)",
26+
"ワンダー!ナンダー!ヤッター!"
27+
];
28+
const promises = docs.map(doc => yoroshiku.transliterate(doc,
29+
{ to: "romaji", mode: "spaced", romajiSystem: "hepburn" }));
30+
return Promise.all(promises);
31+
})
32+
.then(res => {
33+
console.log(JSON.stringify(res));
34+
})
35+
.catch(err => console.error(err));

lib/index.js

+196
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
/**
2+
* Yoroshiku
3+
* @author Loreto Parisi (loretoparisi at gmail dot com)
4+
* @2019 Loreto Parisi
5+
*/
6+
7+
(function () {
8+
9+
const KuroshiroLib = require('./kuroshiro/core');
10+
const KuromojiAnalyzerLib = require('./kuromoji-analyzer/index');
11+
12+
var Yoroshiku;
13+
Yoroshiku = (function () {
14+
15+
/**
16+
* Yoroshiku
17+
* yoroshiku converts Japanese to Hiragana, Katakana or Romaji. It supports furigana and okurigana.converts Japanese to Hiragana, Katakana or Romaji. It supports furigana and okurigana.
18+
*/
19+
function Yoroshiku(options) {
20+
// default options
21+
this._options = {
22+
};
23+
for(var attr in options) this._options[attr] = options[attr];
24+
25+
}//Yoroshiku
26+
27+
/**
28+
* Unload process and kill
29+
*/
30+
Yoroshiku.prototype.unload = function () {
31+
var self = this;
32+
return new Promise(function (resolve, reject) {
33+
// @TODO
34+
return resolve(true);
35+
});
36+
}//unload
37+
38+
/**
39+
* Transliterate text and return
40+
*/
41+
Yoroshiku.prototype.load = function () {
42+
var self = this;
43+
return new Promise(function (resolve, reject) {
44+
self.kuroshiro = new KuroshiroLib();
45+
// dictPath: "url/to/dictFiles"
46+
self.kuroshiro.init(new KuromojiAnalyzerLib({}))
47+
.then(_ => {
48+
return resolve(true);
49+
})
50+
.catch(error => {
51+
return reject(error);
52+
});
53+
});
54+
}//load
55+
56+
/**
57+
* Transliterate
58+
* @param text {String} Text
59+
* @param params {Object} Options:
60+
* to Target syllabary [hiragana, katakana, romaji] - "hiragana"
61+
* mode Convert mode [normal, spaced, okurigana, furigana] - "normal"
62+
* romajiSystem Romanization system [nippon, passport, hepburn] - "hepburn"
63+
* delimiter_start Delimiter(Start) - "("
64+
* delimiter_end Delimiter(End) - ")"
65+
*/
66+
Yoroshiku.prototype.transliterate = function (text, params = {}) {
67+
var self = this;
68+
var options = {
69+
// Convert mode [normal, spaced, okurigana, furigana] - "normal"
70+
mode: "spaced",
71+
// Target syllabary [hiragana, katakana, romaji] - "hiragana"
72+
to: "romaji",
73+
// Romanization system [nippon, passport, hepburn] - "hepburn"
74+
// nippon - ISO-3602 http://www.age.ne.jp/x/nrs/iso3602/iso3602.html
75+
// hepburn - BS 4812 : 1972 https://archive.is/PiJ4
76+
// passport - https://www.ezairyu.mofa.go.jp/passport/hebon.html
77+
// comparison - http://jgrammar.life.coocan.jp/ja/data/rohmaji2.htm
78+
romajiSystem: "hepburn",
79+
// delimiter_start Delimiter(Start) - "("
80+
delimiter_start: "(",
81+
// delimiter_end Delimiter(End) - ")"
82+
delimiter_end: ")"
83+
};
84+
for (var attr in params) options[attr] = params[attr];
85+
return new Promise(function (resolve, reject) {
86+
self.kuroshiro.convert(text, options)
87+
.then(res => {
88+
res = res.replace(/\s(\n+)\s/g, '$1'); // newline
89+
res = res.replace(/\s([\?:;,.^\.\'\-\/\+\<\>,&])/g, '$1'); // punct
90+
res = res.replace(/[ \t\r]+/g, ' '); // contract spaces
91+
return resolve(res);
92+
})
93+
.catch(error => {
94+
return reject(error);
95+
});
96+
});
97+
}//transliterate
98+
99+
/**
100+
* Check if input char is hiragana.
101+
*/
102+
Yoroshiku.prototype.isHiragana = function (char) {
103+
return this.kuroshiro.isHiragana(char);
104+
}//isHiragana
105+
106+
/**
107+
* Check if input char is katakana.
108+
*/
109+
Yoroshiku.prototype.isKatakana = function (char) {
110+
return this.kuroshiro.isHiragana(char);
111+
}//isKatakana
112+
113+
/**
114+
* Check if input char is kana.
115+
*/
116+
Yoroshiku.prototype.isKana = function (char) {
117+
return this.kuroshiro.isKana(char);
118+
}//isKatakana
119+
120+
/**
121+
* Check if input char is kanji.
122+
*/
123+
Yoroshiku.prototype.isKanji = function (char) {
124+
return this.kuroshiro.isKanji(char);
125+
}//isKatakana
126+
127+
/**
128+
* Check if input char is Japanese.
129+
*/
130+
Yoroshiku.prototype.isJapanese = function (char) {
131+
return this.kuroshiro.isJapanese(char);
132+
}//isJapanese
133+
134+
/**
135+
* Check if input string has hiragana.
136+
*/
137+
Yoroshiku.prototype.hasHiragana = function (char) {
138+
return this.kuroshiro.hasHiragana(char);
139+
}//hasHiragana
140+
141+
/**
142+
* Check if input string has katakana.
143+
*/
144+
Yoroshiku.prototype.hasKatakana = function (char) {
145+
return this.kuroshiro.hasKatakana(char);
146+
}//hasKatakana
147+
148+
/**
149+
* Check if input string has kana.
150+
*/
151+
Yoroshiku.prototype.hasKana = function (char) {
152+
return this.kuroshiro.hasKana(char);
153+
}//hasKana
154+
155+
/**
156+
* Check if input string has kanji.
157+
*/
158+
Yoroshiku.prototype.hasKanji = function (char) {
159+
return this.kuroshiro.hasKanji(char);
160+
}//hasKanji
161+
162+
/**
163+
* Check if input string has kanji.
164+
*/
165+
Yoroshiku.prototype.hasJapanese = function (char) {
166+
return this.kuroshiro.hasJapanese(char);
167+
}//hasJapanese
168+
169+
/**
170+
* Convert input kana string to hiragana.
171+
*/
172+
Yoroshiku.prototype.kanaToHiragna = function (char) {
173+
return this.kuroshiro.kanaToHiragna(char);
174+
}//kanaToHiragna
175+
176+
/**
177+
* Convert input kana string to katakana.
178+
*/
179+
Yoroshiku.prototype.kanaToKatakana = function (char) {
180+
return this.kuroshiro.kanaToKatakana(char);
181+
}//kanaToKatakana
182+
183+
/**
184+
* Convert input kana string to romaji. Param system accepts "nippon", "passport", "hepburn" (Default: "hepburn")
185+
*/
186+
Yoroshiku.prototype.kanaToRomaji = function (char) {
187+
return this.kuroshiro.kanaToRomaji(char);
188+
}//kanaToRomaji
189+
190+
return Yoroshiku;
191+
192+
})();
193+
194+
module.exports = Yoroshiku;
195+
196+
}).call(this);
3.77 MB
Binary file not shown.

lib/kuromoji-analyzer/dict/cc.dat.gz

1.61 MB
Binary file not shown.
2.97 MB
Binary file not shown.

lib/kuromoji-analyzer/dict/tid.dat.gz

1.53 MB
Binary file not shown.
1.42 MB
Binary file not shown.
5.64 MB
Binary file not shown.

lib/kuromoji-analyzer/dict/unk.dat.gz

10.3 KB
Binary file not shown.
306 Bytes
Binary file not shown.
338 Bytes
Binary file not shown.
1.11 KB
Binary file not shown.
1.16 KB
Binary file not shown.
10.3 KB
Binary file not shown.

lib/kuromoji-analyzer/index.js

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
const kuromoji = require('./kuromoji/kuromoji');
2+
const path = require('path');
3+
/**
4+
* Kuromoji based morphological analyzer for kuroshiro
5+
*/
6+
class Analyzer {
7+
/**
8+
* Constructor
9+
* @param {Object} [options] JSON object which have key-value pairs settings
10+
* @param {string} [options.dictPath] Path of the dictionary files
11+
*/
12+
constructor({ dictPath } = {}) {
13+
this._analyzer = null;
14+
if (!dictPath) {
15+
this._dictPath = path.join( __dirname, "dict" );
16+
}
17+
else {
18+
this._dictPath = dictPath;
19+
}
20+
}
21+
22+
/**
23+
* Initialize the analyzer
24+
* @returns {Promise} Promise object represents the result of initialization
25+
*/
26+
init() {
27+
return new Promise((resolve, reject) => {
28+
const self = this;
29+
if (this._analyzer == null) {
30+
kuromoji.builder({ dicPath: this._dictPath }).build((err, newAnalyzer) => {
31+
if (err) {
32+
return reject(err);
33+
}
34+
self._analyzer = newAnalyzer;
35+
resolve();
36+
});
37+
}
38+
else {
39+
reject(new Error("This analyzer has already been initialized."));
40+
}
41+
});
42+
}
43+
44+
/**
45+
* Parse the given string
46+
* @param {string} str input string
47+
* @returns {Promise} Promise object represents the result of parsing
48+
* @example The result of parsing
49+
* [{
50+
* "surface_form": "黒白", // 表層形
51+
* "pos": "名詞", // 品詞 (part of speech)
52+
* "pos_detail_1": "一般", // 品詞細分類1
53+
* "pos_detail_2": "*", // 品詞細分類2
54+
* "pos_detail_3": "*", // 品詞細分類3
55+
* "conjugated_type": "*", // 活用型
56+
* "conjugated_form": "*", // 活用形
57+
* "basic_form": "黒白", // 基本形
58+
* "reading": "クロシロ", // 読み
59+
* "pronunciation": "クロシロ", // 発音
60+
* "verbose": { // Other properties
61+
* "word_id": 413560,
62+
* "word_type": "KNOWN",
63+
* "word_position": 1
64+
* }
65+
* }]
66+
*/
67+
parse(str = "") {
68+
return new Promise((resolve, reject) => {
69+
if (str.trim() === "") return resolve([]);
70+
const result = this._analyzer.tokenize(str);
71+
for (let i = 0; i < result.length; i++) {
72+
result[i].verbose = {};
73+
result[i].verbose.word_id = result[i].word_id;
74+
result[i].verbose.word_type = result[i].word_type;
75+
result[i].verbose.word_position = result[i].word_position;
76+
delete result[i].word_id;
77+
delete result[i].word_type;
78+
delete result[i].word_position;
79+
}
80+
resolve(result);
81+
});
82+
}
83+
}
84+
85+
module.exports = Analyzer;

0 commit comments

Comments
 (0)