Skip to content

Commit

Permalink
Move MS-IME user dictionary importer to gui/base.
Browse files Browse the repository at this point in the history
This is a series of CLs to drop dependency on iconv (#252).

MS-IME user dictionary importer is only used from src/gui/.
Moving it to src/gui/base/ makes build dependency simpler and possibly
improves build time depending on build configurations and build
targets.

BUG=#252
TEST=unittest
REF_BUG=19010851
REF_CL=87220744
  • Loading branch information
Noriyuki Takahashi authored and yukawa committed Nov 9, 2015
1 parent 1d3858f commit 8dfa193
Show file tree
Hide file tree
Showing 9 changed files with 320 additions and 253 deletions.
208 changes: 0 additions & 208 deletions src/dictionary/user_dictionary_importer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@

#ifdef OS_WIN
#include <windows.h>
#ifdef HAS_MSIME_HEADER
#indlude <msime.h>
#endif // HAS_MSIME_HEADER
#endif // OS_WIN

#include <algorithm>
Expand All @@ -43,7 +40,6 @@
#include <vector>

#include "base/compiler_specific.h"
#include "base/encoding_util.h"
#include "base/hash.h"
#include "base/logging.h"
#include "base/mmap.h"
Expand Down Expand Up @@ -173,210 +169,6 @@ bool ConvertEntryInternal(

} // namespace

#if defined(OS_WIN) && defined(HAS_MSIME_HEADER)
namespace {

const size_t kBufferSize = 256;

// ProgID of MS-IME Japanese.
const wchar_t kVersionIndependentProgIdForMSIME[] = L"MSIME.Japan";

// Interface identifier of user dictionary in MS-IME.
// {019F7153-E6DB-11d0-83C3-00C04FDDB82E}
const GUID kIidIFEDictionary = {
0x19f7153, 0xe6db, 0x11d0, {0x83, 0xc3, 0x0, 0xc0, 0x4f, 0xdd, 0xb8, 0x2e}
};

IFEDictionary *CreateIFEDictionary() {
CLSID class_id = GUID_NULL;
// On Windows 7 and prior, multiple versions of MS-IME can be installed
// side-by-side. As far as we've observed, the latest version will be chosen
// with version-independent ProgId.
HRESULT result = ::CLSIDFromProgID(kVersionIndependentProgIdForMSIME,
&class_id);
if (FAILED(result)) {
LOG(ERROR) << "CLSIDFromProgID() failed: " << result;
return nullptr;
}
IFEDictionary *obj = nullptr;
result = ::CoCreateInstance(class_id,
nullptr,
CLSCTX_INPROC_SERVER,
kIidIFEDictionary,
reinterpret_cast<void **>(&obj));
if (FAILED(result)) {
LOG(ERROR) << "CoCreateInstance() failed: " << result;
return nullptr;
}
VLOG(1) << "Can create IFEDictionary successfully";
return obj;
}

class ScopedIFEDictionary {
public:
explicit ScopedIFEDictionary(IFEDictionary *dic)
: dic_(dic) {}

~ScopedIFEDictionary() {
if (dic_ != NULL) {
dic_->Close();
dic_->Release();
}
}

IFEDictionary & operator*() const { return *dic_; }
IFEDictionary* operator->() const { return dic_; }
IFEDictionary* get() const { return dic_; }

private:
IFEDictionary *dic_;
};

// Iterator for MS-IME user dictionary
class MSIMEImportIterator
: public UserDictionaryImporter::InputIteratorInterface {
public:
MSIMEImportIterator()
: dic_(CreateIFEDictionary()),
buf_(kBufferSize), result_(E_FAIL), size_(0), index_(0) {
if (dic_.get() == NULL) {
LOG(ERROR) << "IFEDictionaryFactory returned NULL";
return;
}

// open user dictionary
HRESULT result = dic_->Open(NULL, NULL);
if (S_OK != result) {
LOG(ERROR) << "Cannot open user dictionary: " << result_;
return;
}

POSTBL *pos_table = NULL;
int pos_size = 0;
result_ = dic_->GetPosTable(&pos_table, &pos_size);
if (S_OK != result_ || pos_table == NULL || pos_size == 0) {
LOG(ERROR) << "Cannot get POS table: " << result;
result_ = E_FAIL;
return;
}

string name;
for (int i = 0; i < pos_size; ++i) {
EncodingUtil::SJISToUTF8(
reinterpret_cast<char *>(pos_table->szName), &name);
pos_map_.insert(make_pair(pos_table->nPos, name));
++pos_table;
}

// extract all words registered by user.
// Don't use auto-registered words, since Mozc may not be able to
// handle auto_registered words correctly, and user is basically
// unaware of auto-registered words.
result_ = dic_->GetWords(NULL, NULL, NULL,
IFED_POS_ALL,
IFED_SELECT_ALL,
IFED_REG_USER, // | FED_REG_AUTO
reinterpret_cast<UCHAR *>(&buf_[0]),
kBufferSize * sizeof(IMEWRD),
&size_);
}

bool IsAvailable() const {
return result_ == IFED_S_MORE_ENTRIES || result_ == S_OK;
}

// NOTE: Without "UserDictionaryImporter::", Visual C++ 2008 somehow fails
// to look up the type name.
bool Next(UserDictionaryImporter::RawEntry *entry) {
if (!IsAvailable()) {
LOG(ERROR) << "Iterator is not available";
return false;
}

if (entry == NULL) {
LOG(ERROR) << "Entry is NULL";
return false;
}
entry->Clear();

if (index_ < size_) {
if (buf_[index_].pwchReading == NULL ||
buf_[index_].pwchDisplay == NULL) {
++index_;
LOG(ERROR) << "pwchDisplay or pwchReading is NULL";
return true;
}

// set key/value
Util::WideToUTF8(buf_[index_].pwchReading, &entry->key);
Util::WideToUTF8(buf_[index_].pwchDisplay, &entry->value);

// set POS
map<int, string>::const_iterator it = pos_map_.find(buf_[index_].nPos1);
if (it == pos_map_.end()) {
++index_;
LOG(ERROR) << "Unknown POS id: " << buf_[index_].nPos1;
entry->Clear();
return true;
}
entry->pos = it->second;

// set comment
if (buf_[index_].pvComment != NULL) {
if (buf_[index_].uct == IFED_UCT_STRING_SJIS) {
EncodingUtil::SJISToUTF8(
reinterpret_cast<const char *>(buf_[index_].pvComment),
&entry->comment);
} else if (buf_[index_].uct == IFED_UCT_STRING_UNICODE) {
Util::WideToUTF8(
reinterpret_cast<const wchar_t *>(buf_[index_].pvComment),
&entry->comment);
}
}
}

if (index_ < size_) {
++index_;
return true;
} else if (result_ == S_OK) {
return false;
} else if (result_ == IFED_S_MORE_ENTRIES) {
result_ = dic_->NextWords(reinterpret_cast<UCHAR *>(&buf_[0]),
kBufferSize * sizeof(IMEWRD),
&size_);
if (result_ == E_FAIL) {
LOG(ERROR) << "NextWords() failed";
return false;
}
index_ = 0;
return true;
}

return false;
}

private:
vector<IMEWRD> buf_;
ScopedIFEDictionary dic_;
map<int, string> pos_map_;
HRESULT result_;
ULONG size_;
ULONG index_;
};

} // namespace
#endif // OS_WIN && HAS_MSIME_HEADER

UserDictionaryImporter::ErrorType UserDictionaryImporter::ImportFromMSIME(
UserDictionary *user_dic) {
DCHECK(user_dic);
#if defined(OS_WIN) && defined(HAS_MSIME_HEADER)
MSIMEImportIterator iter;
return ImportFromIterator(&iter, user_dic);
#endif // OS_WIN && HAS_MSIME_HEADER
return IMPORT_NOT_SUPPORTED;
}

UserDictionaryImporter::ErrorType UserDictionaryImporter::ImportFromIterator(
InputIteratorInterface *iter, UserDictionary *user_dic) {
if (iter == NULL || user_dic == NULL) {
Expand Down
4 changes: 0 additions & 4 deletions src/dictionary/user_dictionary_importer.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,6 @@ class UserDictionaryImporter {
TextLineIteratorInterface *iter,
user_dictionary::UserDictionary *dic);

// Import a dictionary from MS-IME's user dictionary.
// Only available on Windows
static ErrorType ImportFromMSIME(user_dictionary::UserDictionary *dic);

private:
DISALLOW_IMPLICIT_CONSTRUCTORS(UserDictionaryImporter);
};
Expand Down
20 changes: 0 additions & 20 deletions src/dictionary/user_dictionary_importer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -688,26 +688,6 @@ TEST(UserDictionaryImporter, GuessEncodingTypeTest) {
}
}

TEST(UserDictionaryImporter, ImportFromMSIMETest) {
UserDictionaryStorage::UserDictionary dic;

UserDictionaryImporter::ErrorType result =
UserDictionaryImporter::ImportFromMSIME(&dic);

#ifdef OS_WIN
// Currently the following tests are disabled since necessary components
// are not available on the continuous build system.
// See http://b/237578 for details.
// TODO(yukawa): Arrange some automated tests instead of these tests.
// http://b/2375839
// EXPECT_NE(UserDictionaryImporter::IMPORT_CANNOT_OPEN_DICTIONARY, result);
// EXPECT_NE(UserDictionaryImporter::IMPORT_FATAL, result);
// EXPECT_NE(UserDictionaryImporter::IMPORT_UNKNOWN_ERROR, result);
#else
EXPECT_EQ(UserDictionaryImporter::IMPORT_NOT_SUPPORTED, result);
#endif
}

TEST(UserDictionaryImporter, StringTextLineIterator) {
string line;
const char *kTestData[] = {
Expand Down
Loading

0 comments on commit 8dfa193

Please sign in to comment.