Skip to content

Commit f8bae3e

Browse files
authored
Merge pull request #9 from neocl/dev
Release version 0.1a4
2 parents 8b30d90 + b3b4db8 commit f8bae3e

18 files changed

+159
-67
lines changed

MANIFEST.in

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
include README.rst
22
include CHANGES.md
33
include LICENSE
4+
include requirements*.txt
45
recursive-include jamdict/data/ *.sql
56
recursive-include jamdict/data/ *.json

README.md

+4-3
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,10 @@ I have mirrored these files to Google Drive so you can download there too:
4949
[https://drive.google.com/drive/folders/1z4zF9ImZlNeTZZplflvvnpZfJp3WVLPk](https://drive.google.com/drive/folders/1z4zF9ImZlNeTZZplflvvnpZfJp3WVLPk)
5050

5151
Official website
52-
- JMdict: [http://edrdg.org/jmdict/edict_doc.html](http://edrdg.org/jmdict/edict_doc.html)
53-
- kanjidic2: [http://www.edrdg.org/kanjidic/kanjd2index.html](http://www.edrdg.org/kanjidic/kanjd2index.html)
54-
- KRADFILE: [http://www.edrdg.org/krad/kradinf.html](http://www.edrdg.org/krad/kradinf.html)
52+
53+
* JMdict: [http://edrdg.org/jmdict/edict_doc.html](http://edrdg.org/jmdict/edict_doc.html)
54+
* kanjidic2: [http://www.edrdg.org/kanjidic/kanjd2index.html](http://www.edrdg.org/kanjidic/kanjd2index.html)
55+
* KRADFILE: [http://www.edrdg.org/krad/kradinf.html](http://www.edrdg.org/krad/kradinf.html)
5556

5657

5758
# Sample codes

jamdict/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646

4747
########################################################################
4848

49+
from . import __version__ as version_info
4950
from .__version__ import __author__, __email__, __copyright__, __maintainer__
5051
from .__version__ import __credits__, __license__, __description__, __url__
5152
from .__version__ import __version_major__, __version_long__, __version__, __status__
@@ -54,4 +55,4 @@
5455
from .kanjidic2_sqlite import KanjiDic2SQLite
5556
from .util import Jamdict, JMDictXML, KanjiDic2XML
5657
__all__ = ['Jamdict', 'JMDictSQLite', 'JMDictXML', 'KanjiDic2SQLite', 'KanjiDic2XML',
57-
"__version__", "__author__", "__description__", "__copyright__"]
58+
"__version__", "__author__", "__description__", "__copyright__", "version_info"]

jamdict/__main__.py

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from . import tools
2+
tools.main()

jamdict/__version__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,6 @@
1010
__url__ = "https://github.com/neocl/jamdict"
1111
__maintainer__ = "Le Tuan Anh"
1212
__version_major__ = "0.1"
13-
__version__ = "{}a3".format(__version_major__)
13+
__version__ = "{}a4".format(__version_major__)
1414
__version_long__ = "{} - Alpha".format(__version_major__)
1515
__status__ = "Prototype"

jamdict/config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
import logging
3636

3737
from chirptext import AppConfig
38-
from chirptext.io import read_file, write_file
38+
from chirptext.chio import read_file, write_file
3939

4040
# ----------------------------------------------------------------------
4141
# Configuration

jamdict/data/setup_jmdict.sql

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* Add meta info */
22
CREATE TABLE IF NOT EXISTS meta (
3-
key TEXT UNIQUE,
3+
key TEXT PRIMARY KEY NOT NULL,
44
value TEXT NOT NULL
55
);
66

jamdict/jmdict.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
import logging
5151
from lxml import etree
5252

53-
from chirptext import io as chio
53+
from chirptext import chio
5454

5555
logger = logging.getLogger(__name__)
5656

@@ -82,9 +82,9 @@ def set_info(self, info):
8282
logging.warning("WARNING: multiple info tag")
8383
self.info = info
8484

85-
def text(self, compact=True, separator=' '):
85+
def text(self, compact=True, separator=' ', no_id=False):
8686
tmp = []
87-
if not compact:
87+
if not compact and not no_id:
8888
tmp.append('[id#%s]' % self.idseq)
8989
if self.kana_forms:
9090
tmp.append(self.kana_forms[0].text)

jamdict/jmdict_sqlite.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343

4444
from puchikarui import Schema
4545
from . import __version__ as JAMDICT_VERSION, __url__ as JAMDICT_URL
46-
from .jmdict import JMDEntry, EntryInfo, Link, BibInfo, Audit, KanjiForm, KanaForm, Sense, SenseGloss, LSource
46+
from .jmdict import Meta, JMDEntry, EntryInfo, Link, BibInfo, Audit, KanjiForm, KanaForm, Sense, SenseGloss, LSource
4747

4848

4949
# -------------------------------------------------------------------------------
@@ -85,7 +85,7 @@ def __init__(self, data_source=":memory:", setup_script=None, setup_file=None, *
8585
self.add_script(SETUP_SCRIPT)
8686
self.add_file(JMDICT_SETUP_FILE)
8787
# Meta
88-
self.add_table('meta', ['jmdict_version', 'jmdict_url', 'generator', 'generator_version', 'generator_url'])
88+
self.add_table('meta', ['key', 'value'], proto=Meta).set_id('key')
8989
self.add_table('Entry', ['idseq'])
9090
self.add_table('Link', ['ID', 'idseq', 'tag', 'desc', 'uri'])
9191
self.add_table('Bib', ['ID', 'idseq', 'tag', 'text'])
@@ -146,8 +146,8 @@ def search(self, query, ctx=None):
146146
if ctx is None:
147147
with self.ctx() as ctx:
148148
return self.search(query, ctx=ctx)
149-
where = "idseq IN (SELECT idseq FROM Kanji WHERE text like ?) OR idseq IN (SELECT idseq FROM Kana WHERE text like ?)"
150-
params = [query, query]
149+
where = "idseq IN (SELECT idseq FROM Kanji WHERE text like ?) OR idseq IN (SELECT idseq FROM Kana WHERE text like ?) OR idseq IN (SELECT idseq FROM sense JOIN sensegloss ON sense.ID == sensegloss.sid WHERE text like ?)"
150+
params = [query, query, query]
151151
try:
152152
if query.startswith('id#'):
153153
query_int = int(query[3:])

jamdict/kanjidic2.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
import logging
5252
from lxml import etree
5353

54-
from chirptext import io as chio
54+
from chirptext import chio
5555

5656

5757
# ------------------------------------------------------------------------------
@@ -366,7 +366,7 @@ def __init__(self, qc_type='', value='', skip_misclass=""):
366366
- stroke_count - a mistake in the number of strokes
367367
- stroke_and_posn - mistakes in both division and strokes
368368
- stroke_diff - ambiguous stroke counts depending on glyph
369-
--> """
369+
S --> """
370370
self.cid = None
371371
self.qc_type = qc_type
372372
self.value = value

jamdict/tools.py

+38-24
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,13 @@
3939

4040
import os
4141

42+
4243
from chirptext import confirm, TextReport, Timer
4344
from chirptext.cli import CLIApp, setup_logging
4445

4546
from jamdict import Jamdict
4647
from jamdict import config
48+
from jamdict import version_info
4749

4850
# -------------------------------------------------------------------------------
4951
# Configuration
@@ -53,7 +55,11 @@
5355
JMD_XML = config.get_file('JMDICT_XML')
5456
KD2_XML = config.get_file('KD2_XML')
5557
JMD_DB = config.get_file('JAMDICT_DB')
56-
setup_logging('logging.json', 'logs')
58+
59+
if os.path.isfile('logging.json'):
60+
setup_logging('logging.json', 'logs')
61+
else:
62+
setup_logging(os.path.join(config.home_dir(), 'logging.json'), 'logs')
5763

5864

5965
# -------------------------------------------------------------------------------
@@ -99,39 +105,41 @@ def import_data(cli, args):
99105
print("Database paths were not provided. Process aborted.")
100106

101107

102-
def dump_result(results):
108+
def dump_result(results, report=None):
109+
if report is None:
110+
report = TextReport()
103111
if results.entries:
104-
print("=" * 40)
105-
print("Found entries")
106-
print("=" * 40)
112+
report.print("=" * 40)
113+
report.print("Found entries")
114+
report.print("=" * 40)
107115
for e in results.entries:
108116
kj = ', '.join([k.text for k in e.kanji_forms])
109117
kn = ', '.join([k.text for k in e.kana_forms])
110-
print("Entry: {} | Kj: {} | Kn: {}".format(e.idseq, kj, kn))
111-
print("-" * 20)
118+
report.print("Entry: {} | Kj: {} | Kn: {}".format(e.idseq, kj, kn))
119+
report.print("-" * 20)
112120
for idx, s in enumerate(e.senses):
113-
print("{idx}. {s}".format(idx=idx + 1, s=s))
114-
print('')
121+
report.print("{idx}. {s}".format(idx=idx + 1, s=s))
122+
report.print('')
115123
else:
116-
print("No dictionary entry was found.")
124+
report.print("No dictionary entry was found.")
117125
if results.chars:
118-
print("=" * 40)
119-
print("Found characters")
120-
print("=" * 40)
126+
report.print("=" * 40)
127+
report.print("Found characters")
128+
report.print("=" * 40)
121129
for c in results.chars:
122-
print("Char: {} | Strokes: {}".format(c, c.stroke_count))
123-
print("-" * 20)
130+
report.print("Char: {} | Strokes: {}".format(c, c.stroke_count))
131+
report.print("-" * 20)
124132
for rmg in c.rm_groups:
125-
print("Readings:", ", ".join([r.value for r in rmg.readings]))
126-
print("Meanings:", ", ".join([m.value for m in rmg.meanings if not m.m_lang or m.m_lang == 'en']))
133+
report.print("Readings:", ", ".join([r.value for r in rmg.readings]))
134+
report.print("Meanings:", ", ".join([m.value for m in rmg.meanings if not m.m_lang or m.m_lang == 'en']))
127135
else:
128-
print("No character was found.")
136+
report.print("No character was found.")
129137

130138

131139
def lookup(cli, args):
132140
'''Lookup words by kanji/kana'''
133141
jam = get_jam(cli, args)
134-
results = jam.lookup(args.query)
142+
results = jam.lookup(args.query, strict_lookup=args.strict)
135143
if args.format == 'json':
136144
print(results.to_json())
137145
else:
@@ -148,11 +156,15 @@ def file_status(file_path):
148156

149157
def show_info(cli, args):
150158
''' Show jamdict configuration (data folder, configuration file location, etc.) '''
151-
print("Configuration location: {}".format(config._get_config_manager().locate_config()))
152-
print("-" * 40)
153-
print("Jamdict DB location : {} - {}".format(args.jdb, file_status(args.jdb)))
154-
print("JMDict XML file : {} - {}".format(args.jmdxml, file_status(args.jmdxml)))
155-
print("KanjiDic2 XML file : {} - {}".format(args.kd2xml, file_status(args.kd2xml)))
159+
output = TextReport(args.output) if 'output' in args else TextReport()
160+
output.header("Jamdict | {} - Version: {}".format(version_info.__description__, version_info.__version__), level='h0')
161+
output.header("Basic configuration")
162+
output.print("JAMDICT_HOME: {}".format(config.home_dir()))
163+
output.print("Configuration location: {}".format(config._get_config_manager().locate_config()))
164+
output.header("Data files")
165+
output.print("Jamdict DB location: {} - {}".format(args.jdb, file_status(args.jdb)))
166+
output.print("JMDict XML file : {} - {}".format(args.jmdxml, file_status(args.jmdxml)))
167+
output.print("KanjiDic2 XML file : {} - {}".format(args.kd2xml, file_status(args.kd2xml)))
156168

157169

158170
# -------------------------------------------------------------------------------
@@ -178,13 +190,15 @@ def main():
178190

179191
# show info
180192
info_task = app.add_task('info', func=show_info)
193+
info_task.add_argument('-o', '--output', help='Write information to a text file')
181194
add_data_config(info_task)
182195

183196
# look up task
184197
lookup_task = app.add_task('lookup', func=lookup)
185198
lookup_task.add_argument('query', help='kanji/kana')
186199
lookup_task.add_argument('-f', '--format', help='json or text')
187200
lookup_task.add_argument('--compact', action='store_true')
201+
lookup_task.add_argument('-s', '--strict', action='store_true')
188202
lookup_task.set_defaults(func=lookup)
189203
add_data_config(lookup_task)
190204

jamdict/util.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
import logging
5252
import threading
5353
from collections import defaultdict as dd
54+
from collections import OrderedDict
55+
from chirptext.deko import HIRAGANA, KATAKANA
5456

5557
from . import config
5658
from .jmdict import JMDictXMLParser
@@ -73,13 +75,13 @@ def __init__(self, entries, chars):
7375
self.entries = entries if entries else []
7476
self.chars = chars if chars else []
7577

76-
def text(self, compact=True, entry_sep='。', separator=' | '):
78+
def text(self, compact=True, entry_sep='。', separator=' | ', no_id=False, with_chars=True):
7779
output = []
7880
if self.entries:
79-
entries_txt = str(entry_sep.join(e.text(compact=compact, separator='') for e in self.entries))
81+
entries_txt = str(entry_sep.join(e.text(compact=compact, separator='', no_id=no_id) for e in self.entries))
8082
output.append("Entries: ")
8183
output.append(entries_txt)
82-
if self.entries:
84+
if self.chars and with_chars:
8385
if compact:
8486
chars_txt = ', '.join(str(c) for c in self.chars)
8587
else:
@@ -190,9 +192,9 @@ def import_data(self):
190192
getLogger().info("Importing KanjiDic2 data")
191193
self.kd2.insert_chars(self.kd2_xml)
192194

193-
def get_char(self, literal):
195+
def get_char(self, literal, ctx=None):
194196
if self.kd2 is not None:
195-
return self.kd2.get_char(literal)
197+
return self.kd2.get_char(literal, ctx=ctx)
196198
elif self.kd2_xml:
197199
return self.kd2_xml.lookup(literal)
198200
else:
@@ -206,7 +208,7 @@ def get_entry(self, idseq):
206208
else:
207209
raise LookupError("There is no backend data available")
208210

209-
def lookup(self, query):
211+
def lookup(self, query, strict_lookup=False, lookup_chars=True, ctx=None):
210212
if not self.is_available():
211213
raise LookupError("There is no backend data available")
212214
elif not query:
@@ -215,18 +217,21 @@ def lookup(self, query):
215217
entries = []
216218
chars = []
217219
if self.jmdict is not None:
218-
entries = self.jmdict.search(query)
220+
entries = self.jmdict.search(query, ctx=ctx)
219221
elif self.jmdict_xml:
220222
entries = self.jmdict_xml.lookup(query)
221-
if self.has_kd2():
223+
if lookup_chars and self.has_kd2():
222224
# lookup each character in query and kanji readings of each found entries
223-
chars_to_search = set(query)
224-
if entries:
225+
chars_to_search = OrderedDict({c: c for c in query})
226+
if not strict_lookup and entries:
227+
# auto add characters from entries
225228
for e in entries:
226229
for k in e.kanji_forms:
227-
chars_to_search.update(k.text)
230+
for c in k.text:
231+
if c not in HIRAGANA and c not in KATAKANA:
232+
chars_to_search[c] = c
228233
for c in chars_to_search:
229-
result = self.get_char(c)
234+
result = self.get_char(c, ctx=ctx)
230235
if result is not None:
231236
chars.append(result)
232237
return LookupResult(entries, chars)

jamdol-flask.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -52,22 +52,22 @@
5252
from flask import Flask, Response
5353
from functools import wraps
5454
from flask import request
55+
56+
from chirptext.cli import setup_logging
57+
5558
from jamdict import Jamdict
5659

5760
# ---------------------------------------------------------------------
5861
# CONFIGURATION
5962
# ---------------------------------------------------------------------
6063

64+
setup_logging('logging.json', 'logs')
6165
app = Flask(__name__, static_url_path="")
62-
# Prefer to use jmdict.en
63-
DB_FILE = os.path.abspath('./data/jamdict.en.db')
64-
if not os.path.isfile(DB_FILE):
65-
DB_FILE = os.path.abspath('./data/jamdict.db')
66-
jmd = Jamdict(db_file=DB_FILE)
66+
jmd = Jamdict()
6767

6868

69-
def get_logger():
70-
logging.getLogger(__name__)
69+
def getLogger():
70+
return logging.getLogger(__name__)
7171

7272

7373
# ---------------------------------------------------------------------
@@ -100,9 +100,11 @@ def get_entry(idseq):
100100

101101

102102
@app.route('/jamdol/search/<query>', methods=['GET'])
103+
@app.route('/jamdol/search/<strict>/<query>', methods=['GET'])
103104
@jsonp
104-
def search(query):
105-
results = jmd.lookup(query)
105+
def search(query, strict=None):
106+
getLogger().info("Query = {}".format(query))
107+
results = jmd.lookup(query, strict_lookup=strict)
106108
return results.to_json()
107109

108110

0 commit comments

Comments
 (0)