Merge pull request #9 from neocl/dev

letuananh · web-flow · commit f8bae3e15d00 · 2018-07-19T17:45:55.000+08:00
Release version 0.1a4
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,5 +1,6 @@
 include README.rst
 include CHANGES.md
 include LICENSE
+include requirements*.txt
 recursive-include jamdict/data/ *.sql
 recursive-include jamdict/data/ *.json
diff --git a/README.md b/README.md
@@ -49,9 +49,10 @@ I have mirrored these files to Google Drive so you can download there too:
 [https://drive.google.com/drive/folders/1z4zF9ImZlNeTZZplflvvnpZfJp3WVLPk](https://drive.google.com/drive/folders/1z4zF9ImZlNeTZZplflvvnpZfJp3WVLPk)
 
 Official website
-- JMdict: [http://edrdg.org/jmdict/edict_doc.html](http://edrdg.org/jmdict/edict_doc.html)
-- kanjidic2: [http://www.edrdg.org/kanjidic/kanjd2index.html](http://www.edrdg.org/kanjidic/kanjd2index.html)
-- KRADFILE: [http://www.edrdg.org/krad/kradinf.html](http://www.edrdg.org/krad/kradinf.html)
+
+* JMdict: [http://edrdg.org/jmdict/edict_doc.html](http://edrdg.org/jmdict/edict_doc.html)
+* kanjidic2: [http://www.edrdg.org/kanjidic/kanjd2index.html](http://www.edrdg.org/kanjidic/kanjd2index.html)
+* KRADFILE: [http://www.edrdg.org/krad/kradinf.html](http://www.edrdg.org/krad/kradinf.html)
 
 
 # Sample codes
diff --git a/jamdict/__init__.py b/jamdict/__init__.py
@@ -46,6 +46,7 @@
 
 ########################################################################
 
+from . import __version__ as version_info
 from .__version__ import __author__, __email__, __copyright__, __maintainer__
 from .__version__ import __credits__, __license__, __description__, __url__
 from .__version__ import __version_major__, __version_long__, __version__, __status__
@@ -54,4 +55,4 @@
 from .kanjidic2_sqlite import KanjiDic2SQLite
 from .util import Jamdict, JMDictXML, KanjiDic2XML
 __all__ = ['Jamdict', 'JMDictSQLite', 'JMDictXML', 'KanjiDic2SQLite', 'KanjiDic2XML',
-           "__version__", "__author__", "__description__", "__copyright__"]
+           "__version__", "__author__", "__description__", "__copyright__", "version_info"]
diff --git a/jamdict/__main__.py b/jamdict/__main__.py
@@ -0,0 +1,2 @@
+from . import tools
+tools.main()
diff --git a/jamdict/__version__.py b/jamdict/__version__.py
@@ -10,6 +10,6 @@
 __url__ = "https://github.com/neocl/jamdict"
 __maintainer__ = "Le Tuan Anh"
 __version_major__ = "0.1"
-__version__ = "{}a3".format(__version_major__)
+__version__ = "{}a4".format(__version_major__)
 __version_long__ = "{} - Alpha".format(__version_major__)
 __status__ = "Prototype"
diff --git a/jamdict/config.py b/jamdict/config.py
@@ -35,7 +35,7 @@
 import logging
 
 from chirptext import AppConfig
-from chirptext.io import read_file, write_file
+from chirptext.chio import read_file, write_file
 
 # ----------------------------------------------------------------------
 # Configuration
diff --git a/jamdict/data/setup_jmdict.sql b/jamdict/data/setup_jmdict.sql
@@ -1,6 +1,6 @@
 /* Add meta info */
 CREATE TABLE IF NOT EXISTS meta (
-       key TEXT UNIQUE,
+       key TEXT PRIMARY KEY NOT NULL,
        value TEXT NOT NULL
 );
 
diff --git a/jamdict/jmdict.py b/jamdict/jmdict.py
@@ -50,7 +50,7 @@
 import logging
 from lxml import etree
 
-from chirptext import io as chio
+from chirptext import chio
 
 logger = logging.getLogger(__name__)
 
@@ -82,9 +82,9 @@ def set_info(self, info):
             logging.warning("WARNING: multiple info tag")
         self.info = info
 
-    def text(self, compact=True, separator=' '):
+    def text(self, compact=True, separator=' ', no_id=False):
         tmp = []
-        if not compact:
+        if not compact and not no_id:
             tmp.append('[id#%s]' % self.idseq)
         if self.kana_forms:
             tmp.append(self.kana_forms[0].text)
diff --git a/jamdict/jmdict_sqlite.py b/jamdict/jmdict_sqlite.py
@@ -43,7 +43,7 @@
 
 from puchikarui import Schema
 from . import __version__ as JAMDICT_VERSION, __url__ as JAMDICT_URL
-from .jmdict import JMDEntry, EntryInfo, Link, BibInfo, Audit, KanjiForm, KanaForm, Sense, SenseGloss, LSource
+from .jmdict import Meta, JMDEntry, EntryInfo, Link, BibInfo, Audit, KanjiForm, KanaForm, Sense, SenseGloss, LSource
 
 
 # -------------------------------------------------------------------------------
@@ -85,7 +85,7 @@ def __init__(self, data_source=":memory:", setup_script=None, setup_file=None, *
         self.add_script(SETUP_SCRIPT)
         self.add_file(JMDICT_SETUP_FILE)
         # Meta
-        self.add_table('meta', ['jmdict_version', 'jmdict_url', 'generator', 'generator_version', 'generator_url'])
+        self.add_table('meta', ['key', 'value'], proto=Meta).set_id('key')
         self.add_table('Entry', ['idseq'])
         self.add_table('Link', ['ID', 'idseq', 'tag', 'desc', 'uri'])
         self.add_table('Bib', ['ID', 'idseq', 'tag', 'text'])
@@ -146,8 +146,8 @@ def search(self, query, ctx=None):
         if ctx is None:
             with self.ctx() as ctx:
                 return self.search(query, ctx=ctx)
-        where = "idseq IN (SELECT idseq FROM Kanji WHERE text like ?) OR idseq IN (SELECT idseq FROM Kana WHERE text like ?)"
-        params = [query, query]
+        where = "idseq IN (SELECT idseq FROM Kanji WHERE text like ?) OR idseq IN (SELECT idseq FROM Kana WHERE text like ?) OR idseq IN (SELECT idseq FROM sense JOIN sensegloss ON sense.ID == sensegloss.sid WHERE text like ?)"
+        params = [query, query, query]
         try:
             if query.startswith('id#'):
                 query_int = int(query[3:])
diff --git a/jamdict/kanjidic2.py b/jamdict/kanjidic2.py
@@ -51,7 +51,7 @@
 import logging
 from lxml import etree
 
-from chirptext import io as chio
+from chirptext import chio
 
 
 # ------------------------------------------------------------------------------
@@ -366,7 +366,7 @@ def __init__(self, qc_type='', value='', skip_misclass=""):
     - stroke_count - a mistake in the number of strokes
     - stroke_and_posn - mistakes in both division and strokes
     - stroke_diff - ambiguous stroke counts depending on glyph
-    --> """
+S    --> """
         self.cid = None
         self.qc_type = qc_type
         self.value = value
diff --git a/jamdict/tools.py b/jamdict/tools.py
@@ -39,11 +39,13 @@
 
 import os
 
+
 from chirptext import confirm, TextReport, Timer
 from chirptext.cli import CLIApp, setup_logging
 
 from jamdict import Jamdict
 from jamdict import config
+from jamdict import version_info
 
 # -------------------------------------------------------------------------------
 # Configuration
@@ -53,7 +55,11 @@
 JMD_XML = config.get_file('JMDICT_XML')
 KD2_XML = config.get_file('KD2_XML')
 JMD_DB = config.get_file('JAMDICT_DB')
-setup_logging('logging.json', 'logs')
+
+if os.path.isfile('logging.json'):
+    setup_logging('logging.json', 'logs')
+else:
+    setup_logging(os.path.join(config.home_dir(), 'logging.json'), 'logs')
 
 
 # -------------------------------------------------------------------------------
@@ -99,39 +105,41 @@ def import_data(cli, args):
         print("Database paths were not provided. Process aborted.")
 
 
-def dump_result(results):
+def dump_result(results, report=None):
+    if report is None:
+        report = TextReport()
     if results.entries:
-        print("=" * 40)
-        print("Found entries")
-        print("=" * 40)
+        report.print("=" * 40)
+        report.print("Found entries")
+        report.print("=" * 40)
         for e in results.entries:
             kj = ', '.join([k.text for k in e.kanji_forms])
             kn = ', '.join([k.text for k in e.kana_forms])
-            print("Entry: {} | Kj:  {} | Kn: {}".format(e.idseq, kj, kn))
-            print("-" * 20)
+            report.print("Entry: {} | Kj:  {} | Kn: {}".format(e.idseq, kj, kn))
+            report.print("-" * 20)
             for idx, s in enumerate(e.senses):
-                print("{idx}. {s}".format(idx=idx + 1, s=s))
-            print('')
+                report.print("{idx}. {s}".format(idx=idx + 1, s=s))
+            report.print('')
     else:
-        print("No dictionary entry was found.")
+        report.print("No dictionary entry was found.")
     if results.chars:
-        print("=" * 40)
-        print("Found characters")
-        print("=" * 40)
+        report.print("=" * 40)
+        report.print("Found characters")
+        report.print("=" * 40)
         for c in results.chars:
-            print("Char: {} | Strokes: {}".format(c, c.stroke_count))
-            print("-" * 20)
+            report.print("Char: {} | Strokes: {}".format(c, c.stroke_count))
+            report.print("-" * 20)
             for rmg in c.rm_groups:
-                print("Readings:", ", ".join([r.value for r in rmg.readings]))
-                print("Meanings:", ", ".join([m.value for m in rmg.meanings if not m.m_lang or m.m_lang == 'en']))
+                report.print("Readings:", ", ".join([r.value for r in rmg.readings]))
+                report.print("Meanings:", ", ".join([m.value for m in rmg.meanings if not m.m_lang or m.m_lang == 'en']))
     else:
-        print("No character was found.")
+        report.print("No character was found.")
 
 
 def lookup(cli, args):
     '''Lookup words by kanji/kana'''
     jam = get_jam(cli, args)
-    results = jam.lookup(args.query)
+    results = jam.lookup(args.query, strict_lookup=args.strict)
     if args.format == 'json':
         print(results.to_json())
     else:
@@ -148,11 +156,15 @@ def file_status(file_path):
 
 def show_info(cli, args):
     ''' Show jamdict configuration (data folder, configuration file location, etc.) '''
-    print("Configuration location: {}".format(config._get_config_manager().locate_config()))
-    print("-" * 40)
-    print("Jamdict DB location   : {} - {}".format(args.jdb, file_status(args.jdb)))
-    print("JMDict XML file       : {} - {}".format(args.jmdxml, file_status(args.jmdxml)))
-    print("KanjiDic2 XML file    : {} - {}".format(args.kd2xml, file_status(args.kd2xml)))
+    output = TextReport(args.output) if 'output' in args else TextReport()
+    output.header("Jamdict | {} - Version: {}".format(version_info.__description__, version_info.__version__), level='h0')
+    output.header("Basic configuration")
+    output.print("JAMDICT_HOME:           {}".format(config.home_dir()))
+    output.print("Configuration location: {}".format(config._get_config_manager().locate_config()))
+    output.header("Data files")
+    output.print("Jamdict DB location: {} - {}".format(args.jdb, file_status(args.jdb)))
+    output.print("JMDict XML file    : {} - {}".format(args.jmdxml, file_status(args.jmdxml)))
+    output.print("KanjiDic2 XML file : {} - {}".format(args.kd2xml, file_status(args.kd2xml)))
 
 
 # -------------------------------------------------------------------------------
@@ -178,13 +190,15 @@ def main():
 
     # show info
     info_task = app.add_task('info', func=show_info)
+    info_task.add_argument('-o', '--output', help='Write information to a text file')
     add_data_config(info_task)
 
     # look up task
     lookup_task = app.add_task('lookup', func=lookup)
     lookup_task.add_argument('query', help='kanji/kana')
     lookup_task.add_argument('-f', '--format', help='json or text')
     lookup_task.add_argument('--compact', action='store_true')
+    lookup_task.add_argument('-s', '--strict', action='store_true')
     lookup_task.set_defaults(func=lookup)
     add_data_config(lookup_task)
 
diff --git a/jamdict/util.py b/jamdict/util.py
@@ -51,6 +51,8 @@
 import logging
 import threading
 from collections import defaultdict as dd
+from collections import OrderedDict
+from chirptext.deko import HIRAGANA, KATAKANA
 
 from . import config
 from .jmdict import JMDictXMLParser
@@ -73,13 +75,13 @@ def __init__(self, entries, chars):
         self.entries = entries if entries else []
         self.chars = chars if chars else []
 
-    def text(self, compact=True, entry_sep='。', separator=' | '):
+    def text(self, compact=True, entry_sep='。', separator=' | ', no_id=False, with_chars=True):
         output = []
         if self.entries:
-            entries_txt = str(entry_sep.join(e.text(compact=compact, separator='') for e in self.entries))
+            entries_txt = str(entry_sep.join(e.text(compact=compact, separator='', no_id=no_id) for e in self.entries))
             output.append("Entries: ")
             output.append(entries_txt)
-        if self.entries:
+        if self.chars and with_chars:
             if compact:
                 chars_txt = ', '.join(str(c) for c in self.chars)
             else:
@@ -190,9 +192,9 @@ def import_data(self):
             getLogger().info("Importing KanjiDic2 data")
             self.kd2.insert_chars(self.kd2_xml)
 
-    def get_char(self, literal):
+    def get_char(self, literal, ctx=None):
         if self.kd2 is not None:
-            return self.kd2.get_char(literal)
+            return self.kd2.get_char(literal, ctx=ctx)
         elif self.kd2_xml:
             return self.kd2_xml.lookup(literal)
         else:
@@ -206,7 +208,7 @@ def get_entry(self, idseq):
         else:
             raise LookupError("There is no backend data available")
 
-    def lookup(self, query):
+    def lookup(self, query, strict_lookup=False, lookup_chars=True, ctx=None):
         if not self.is_available():
             raise LookupError("There is no backend data available")
         elif not query:
@@ -215,18 +217,21 @@ def lookup(self, query):
         entries = []
         chars = []
         if self.jmdict is not None:
-            entries = self.jmdict.search(query)
+            entries = self.jmdict.search(query, ctx=ctx)
         elif self.jmdict_xml:
             entries = self.jmdict_xml.lookup(query)
-        if self.has_kd2():
+        if lookup_chars and self.has_kd2():
             # lookup each character in query and kanji readings of each found entries
-            chars_to_search = set(query)
-            if entries:
+            chars_to_search = OrderedDict({c: c for c in query})
+            if not strict_lookup and entries:
+                # auto add characters from entries
                 for e in entries:
                     for k in e.kanji_forms:
-                        chars_to_search.update(k.text)
+                        for c in k.text:
+                            if c not in HIRAGANA and c not in KATAKANA:
+                                chars_to_search[c] = c
             for c in chars_to_search:
-                result = self.get_char(c)
+                result = self.get_char(c, ctx=ctx)
                 if result is not None:
                     chars.append(result)
         return LookupResult(entries, chars)
diff --git a/jamdol-flask.py b/jamdol-flask.py
@@ -52,22 +52,22 @@
 from flask import Flask, Response
 from functools import wraps
 from flask import request
+
+from chirptext.cli import setup_logging
+
 from jamdict import Jamdict
 
 # ---------------------------------------------------------------------
 # CONFIGURATION
 # ---------------------------------------------------------------------
 
+setup_logging('logging.json', 'logs')
 app = Flask(__name__, static_url_path="")
-# Prefer to use jmdict.en
-DB_FILE = os.path.abspath('./data/jamdict.en.db')
-if not os.path.isfile(DB_FILE):
-    DB_FILE = os.path.abspath('./data/jamdict.db')
-jmd = Jamdict(db_file=DB_FILE)
+jmd = Jamdict()
 
 
-def get_logger():
-    logging.getLogger(__name__)
+def getLogger():
+    return logging.getLogger(__name__)
 
 
 # ---------------------------------------------------------------------
@@ -100,9 +100,11 @@ def get_entry(idseq):
 
 
 @app.route('/jamdol/search/<query>', methods=['GET'])
+@app.route('/jamdol/search/<strict>/<query>', methods=['GET'])
 @jsonp
-def search(query):
-    results = jmd.lookup(query)
+def search(query, strict=None):
+    getLogger().info("Query = {}".format(query))
+    results = jmd.lookup(query, strict_lookup=strict)
     return results.to_json()
 
 
diff --git a/logging.json b/logging.json
diff --git a/release.sh b/release.sh
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.py b/setup.py
diff --git a/test/test_jmdict_sqlite.py b/test/test_jmdict_sqlite.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from . import tools`
	`2`	`+tools.main()`