From cbf7226a1f0e9a2948d0517c359978b026fd802f Mon Sep 17 00:00:00 2001 From: Hake Huang Date: Tue, 28 Feb 2017 15:37:59 +0800 Subject: [PATCH 1/2] fix sqrt issue when there is only one char various in traindata Signed-off-by: Hake Huang --- lib/classifier-reborn/extensions/hasher.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/classifier-reborn/extensions/hasher.rb b/lib/classifier-reborn/extensions/hasher.rb index c1bf1de..a6b4c5f 100644 --- a/lib/classifier-reborn/extensions/hasher.rb +++ b/lib/classifier-reborn/extensions/hasher.rb @@ -27,7 +27,7 @@ def clean_word_hash(str, language = 'en', enable_stemmer = true) def word_hash_for_words(words, language = 'en', enable_stemmer = true) d = Hash.new(0) words.each do |word| - next unless word.length > 2 && !STOPWORDS[language].include?(word) + next unless word.length > 0 && !STOPWORDS[language].include?(word) if enable_stemmer d[word.stem.intern] += 1 else From ecfca6a6938c76d229187ded1f6ba004b7f55bf1 Mon Sep 17 00:00:00 2001 From: Hake Huang Date: Wed, 1 Mar 2017 13:00:11 +0800 Subject: [PATCH 2/2] update test cases for 1 byte fixing Signed-off-by: Hake Huang --- test/bayes/bayesian_common_tests.rb | 4 ++-- test/extensions/hasher_test.rb | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/bayes/bayesian_common_tests.rb b/test/bayes/bayesian_common_tests.rb index 902ba92..c007e3f 100644 --- a/test/bayes/bayesian_common_tests.rb +++ b/test/bayes/bayesian_common_tests.rb @@ -139,10 +139,10 @@ def test_skip_empty_training_and_classification classifier.train('Ruby', '') assert classifier.categories.empty? classifier.train('Ruby', 'To be or not to be') - assert classifier.categories.empty? + refute classifier.categories.empty? classifier.train('Ruby', 'A really sweet language') refute classifier.categories.empty? - assert_equal Float::INFINITY, classifier.classify_with_score('To be or not to be')[1] + assert_equal Float::INFINITY, classifier.classify_with_score('')[1] end def test_empty_string_stopwords diff --git a/test/extensions/hasher_test.rb b/test/extensions/hasher_test.rb index 336a8b7..c362d41 100644 --- a/test/extensions/hasher_test.rb +++ b/test/extensions/hasher_test.rb @@ -56,7 +56,7 @@ def test_add_custom_stopword_path temp_stopwords_name = File.basename(temp_stopwords.path) Hasher.add_custom_stopword_path(temp_stopwords_path) - hash = { list: 1, cool: 1 } + hash = {:is=>1, :a=>1, :list=>1, :of=>1, :cool=>1} assert_equal hash, Hasher.clean_word_hash("this is a list of cool words!", temp_stopwords_name) end