Skip to content

Commit

Permalink
fixed issue in bug/#42
Browse files Browse the repository at this point in the history
  • Loading branch information
Kensuke-Mitsuzawa committed Feb 27, 2018
1 parent 5122e20 commit 8c8afef
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 7 deletions.
15 changes: 13 additions & 2 deletions JapaneseTokenizer/common/text_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
import six
import re
import unicodedata
import neologdn
from JapaneseTokenizer import init_logger
import logging
logger = init_logger.init_logger(logging.getLogger(init_logger.LOGGER_NAME))
__author__ = 'kensuke-mi'

if six.PY2:
Expand All @@ -20,6 +22,13 @@ def u(str): return str
def b(str): return str.encode("utf-8")
pass

try:
import neologdn
is_neologdn_valid = True
except:
logger.warning("neologdn package is not installed yet. You could not call neologd dictionary.")
is_neologdn_valid = False

STRING_EXCEPTION = set([u('*')])


Expand Down Expand Up @@ -57,8 +66,10 @@ def normalize_text(input_text,
else:
without_new_line = new_line_replaced

if dictionary_mode=='neologd':
if dictionary_mode=='neologd' and is_neologdn_valid:
return neologdn.normalize(normalize_text_normal_ipadic(without_new_line))
elif dictionary_mode=='neologd' and is_neologdn_valid == False:
raise Exception("You could not call neologd dictionary bacause you do NOT install the package neologdn.")
else:
return normalize_text_normal_ipadic(without_new_line, kana=is_kana, ascii=is_ascii, digit=is_digit)

Expand Down
12 changes: 10 additions & 2 deletions JapaneseTokenizer/mecab_wrapper/mecab_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import os
import logging
import subprocess
import neologdn
import six
from six import text_type
# typing
Expand All @@ -22,6 +21,13 @@
logger = init_logger.init_logger(logging.getLogger(init_logger.LOGGER_NAME))
python_version = sys.version_info

try:
import neologdn
is_neologdn_valid = True
except:
logger.warning("neologdn package is not installed yet. You could not call neologd dictionary.")
is_neologdn_valid = False


class MecabWrapper(WrapperBase):
def __init__(self, dictType, pathUserDictCsv='', path_mecab_config=None, string_encoding='utf-8'):
Expand Down Expand Up @@ -243,8 +249,10 @@ def tokenize(self, sentence,
pass

### decide normalization function depending on dictType
if func_normalizer is None and self._dictType == 'neologd':
if func_normalizer is None and self._dictType == 'neologd' and is_neologdn_valid:
normalized_sentence = neologdn.normalize(sentence)
elif func_normalizer is None and self._dictType == 'neologd' and is_neologdn_valid == False:
raise Exception("You could not call neologd dictionary bacause you do NOT install the package neologdn.")
elif func_normalizer == normalize_text:
normalized_sentence = normalize_text(sentence, dictionary_mode=self._dictType)
elif func_normalizer is None:
Expand Down
15 changes: 12 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,26 @@
except ImportError:
logger.error('We failed to install pyknp automatically. Try installing pyknp manually.')
# --------------------------------------------------------------------------------------------------------
try:
import neologdn
except ImportError:
try:
pip.main(['install', 'neologdn'])
import neologdn
except:
logger.error('We failed to install neologdn automatically because of some issues in the package. Try installing pyknp manually.')
# --------------------------------------------------------------------------------------------------------

if python_version >= (3, 0, 0):
logger.info(msg='python={}'.format(python_version))
install_requires = ['pypandoc', 'future', 'six', 'mecab-python3', 'jaconv>=0.2', 'pip>=8.1.0', 'typing', 'neologdn', 'pexpect']
install_requires = ['pypandoc', 'future', 'six', 'mecab-python3', 'jaconv>=0.2', 'pip>=8.1.0', 'typing', 'pexpect']
elif python_version <= (2, 9, 9):
logger.info(msg='python={}'.format(python_version))
install_requires = ['pypandoc', 'future', 'six', 'mecab-python', 'jaconv>=0.2', 'pip>=8.1.0', 'typing', 'neologdn', 'pexpect']
install_requires = ['pypandoc', 'future', 'six', 'mecab-python', 'jaconv>=0.2', 'pip>=8.1.0', 'typing', 'pexpect']
else:
raise NotImplementedError()

version = '1.3.6'
version = '1.3.7'
name = 'JapaneseTokenizer'
short_description = '`JapaneseTokenizer` is a package for easy Japanese Tokenization'

Expand Down

0 comments on commit 8c8afef

Please sign in to comment.