Skip to content

Commit

Permalink
remove engine name normalization part, as suggested by @wannaphongcom
Browse files Browse the repository at this point in the history
  • Loading branch information
bact committed Oct 13, 2018
1 parent 9a03963 commit bc74d15
Showing 1 changed file with 0 additions and 2 deletions.
2 changes: 0 additions & 2 deletions pythainlp/tokenize/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def dict_word_tokenize(text, custom_dict_trie, engine="newmm"):
>>> dict_word_tokenize("แมวดีดีแมว",data_dict)
['แมว', 'ดี', 'ดี', 'แมว']
"""
engine = engine.strip().lower()
if engine == "newmm" or engine == "onecut":
from .newmm import mmcut as segment
elif engine == "mm" or engine == "multi_cut":
Expand All @@ -104,7 +103,6 @@ def sent_tokenize(text, engine="whitespace+newline"):
:return: a list of text, split by whitespace or new line.
"""
engine = engine.strip().lower()
if engine == "whitespace":
sentences = nltk.tokenize.WhitespaceTokenizer().tokenize(text)
else:
Expand Down

0 comments on commit bc74d15

Please sign in to comment.