PyThaiNLP · bact · Nov 9, 2018 · Nov 6, 2018 · Nov 6, 2018 · Nov 6, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -3,12 +3,11 @@
 
 language: python
 python:
-  - "3.4"
-  - "3.5"
   - "3.6"
 # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
 install:
-  - pip install -r requirements-travis.txt
+  - pip install -r requirements.txt
+  - pip install .[icu,ner,pos,tokenize,transliterate]
   - pip install coveralls
 
 os:

diff --git a/README-pypi.md b/README-pypi.md
@@ -1,6 +1,6 @@
 ![PyThaiNLP Logo](https://avatars0.githubusercontent.com/u/32934255?s=200&v=4)
 
-# PyThaiNLP 1.7
+# PyThaiNLP 1.8.0
 
 [![Codacy Badge](https://api.codacy.com/project/badge/Grade/cb946260c87a4cc5905ca608704406f7)](https://www.codacy.com/app/pythainlp/pythainlp_2?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=PyThaiNLP/pythainlp&amp;utm_campaign=Badge_Grade)[![pypi](https://img.shields.io/pypi/v/pythainlp.svg)](https://pypi.python.org/pypi/pythainlp)
 [![Build Status](https://travis-ci.org/PyThaiNLP/pythainlp.svg?branch=develop)](https://travis-ci.org/PyThaiNLP/pythainlp)
@@ -14,7 +14,7 @@ PyThaiNLP features include Thai word and subword segmentations, soundex, romaniz
 
 ## What's new in version 1.7 ?
 
-- Deprecate Python 2 support
+- Deprecate Python 2 support. (Python 2 compatibility code will be completely dropped in PyThaiNLP 1.8)
 - Refactor pythainlp.tokenize.pyicu for readability
 - Add Thai NER model to pythainlp.ner
 - thai2vec v0.2 - larger vocab, benchmarking results on Wongnai dataset

diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ Python 2 users can still use PyThaiNLP 1.6.
 ## Capabilities
 
 - Thai word segmentation (```word_tokenize```), including subword segmentation based on Thai Character Cluster (```tcc```) and ETCC (```etcc```)
-- Thai romanization (```romanize```)
+- Thai romanization and transliteration (```romanize```, ```transliterate```)
 - Thai part-of-speech taggers (```pos_tag```)
 - Read out number to Thai words (```bahttext```, ```num_to_thaiword```)
 - Thai collation (sort by dictionoary order) (```collate```)
@@ -85,7 +85,7 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนเพื่
 ## ความสามารถ
 
 - ตัดคำภาษาไทย (```word_tokenize```) และรองรับ Thai Character Clusters (```tcc```) และ ETCC (```etcc```)
-- ถอดเสียงภาษาไทยเป็นอักษรละติน (```romanize```)
+- ถอดเสียงภาษาไทยเป็นอักษรละตินและสัทอักษร (```romanize```, ```transliterate```)
 - ระบุชนิดคำ (part-of-speech) ภาษาไทย (```pos_tag```)
 - อ่านตัวเลขเป็นข้อความภาษาไทย (```bahttext```, ```num_to_thaiword```)
 - เรียงลำดับคำตามพจนานุกรมไทย (```collate```)

diff --git a/appveyor.yml b/appveyor.yml
@@ -2,11 +2,6 @@ build: off
 
 environment:
   matrix:
-    - PYTHON: "C:/Python34"
-      PYTHON_VERSION: "3.4"
-      PYTHON_ARCH: "32"
-      PYICU_WHEEL: "https://get.openlp.org/win-sdk/PyICU-1.9.5-cp34-cp34m-win32.whl"
-
     - PYTHON: "C:/Python36"
       PYTHON_VERSION: "3.6"
       PYTHON_ARCH: "32"
@@ -37,7 +32,7 @@ install:
   # - "set ICU_VERSION=62"
   - "%PYTHON%/python.exe -m pip install --upgrade pip"
   - "%PYTHON%/python.exe -m pip install %PYICU_WHEEL%"
-  - "%PYTHON%/python.exe -m pip install -e ."
+  - "%PYTHON%/python.exe -m pip install -e .[icu,ner,pos,tokenize,transliterate]"
 
 test_script:
   - "%PYTHON%/python.exe -m pip --version"

diff --git a/docs/api/romanization.rst b/docs/api/romanization.rst
@@ -1,10 +1,10 @@
 .. currentmodule:: pythainlp.romanization
 
-pythainlp.romanization
+pythainlp.transliterate
 ====================================
-The :class:`pythainlp.romanization` turns thai text into a romanized one (put simply, spelled with English).
+The :class:`pythainlp.transliterate` turns Thai text into a romanized one (put simply, spelled with English).
 
-.. autofunction:: romanization
-.. currentmodule:: pythainlp.romanization.thai2rom
+.. autofunction:: transliterate
+.. currentmodule:: pythainlp.transliterate.thai2rom
 .. autoclass:: thai2rom
    :members: romanize
diff --git a/docs/conf.py b/docs/conf.py
@@ -29,7 +29,7 @@
 # The short X.Y version
 version = ''
 # The full version, including alpha/beta/rc tags
-release = '1.7'
+release = '1.8.0'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/docs/pythainlp-dev-thai.md b/docs/pythainlp-dev-thai.md
@@ -256,12 +256,13 @@ lentext คือ จำนวนคำขั้นต่ำที่ต้อ
 
 คืนค่าเป็น dict
 
-### romanization
+### transliteration
 
 ```python
-from pythainlp.romanization import romanize
+from pythainlp.transliterate import romanize, transliterate
 
 romanize(str, engine="royin")
+transliterate(str, engine="pyicu")
 ```
 
 มี engine ดังนี้
@@ -275,9 +276,10 @@ romanize(str, engine="royin")
 **ตัวอย่าง**
 
 ```python
-from pythainlp.romanization import romanize
+from pythainlp.transliterate import romanize, transliterate
 
 romanize("แมว")  # 'maew'
+transliterate("นก")
 ```
 
 ### spell

diff --git a/examples/romanization.py b/examples/romanization.py
diff --git a/examples/transliterate.py b/examples/transliterate.py
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+
+from pythainlp.transliterate import romanize, transliterate
+
+print(romanize("แมว"))
+print(transliterate("แมว"))
diff --git a/pythainlp/__init__.py b/pythainlp/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-__version__ = 1.7
+__version__ = 1.8
 
 thai_alphabets = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ"  # 44 chars
 thai_vowels = "ฤฦะ\u0e31าำ\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39เแโใไ\u0e45\u0e47"  # 19
@@ -24,7 +24,7 @@
 
 from pythainlp.collation import collate
 from pythainlp.date import now
-from pythainlp.romanization import romanize
+from pythainlp.transliterate import romanize, transliterate
 from pythainlp.sentiment import sentiment
 from pythainlp.soundex import soundex
 from pythainlp.spell import spell

diff --git a/pythainlp/corpus/tnc.py b/pythainlp/corpus/tnc.py
@@ -40,7 +40,7 @@ def word_freq(word, domain="all"):
 
     r = requests.post(url, data=data)
 
-    pat = re.compile('TOTAL</font>(?s).*?#ffffff">(.*?)</font>')
+    pat = re.compile(r'TOTAL</font>(?s).*?#ffffff">(.*?)</font>')
     match = pat.search(r.text)
 
     n = 0

diff --git a/pythainlp/g2p/__init__.py b/pythainlp/g2p/__init__.py
diff --git a/pythainlp/ner/__init__.py b/pythainlp/ner/__init__.py
@@ -4,23 +4,12 @@
 """
 __all__ = ["ThaiNameRecognizer"]
 
+import sklearn_crfsuite
 from pythainlp.corpus import download, get_file, thai_stopwords
 from pythainlp.tag import pos_tag
 from pythainlp.tokenize import word_tokenize
 from pythainlp.util import is_thaiword
 
-try:
-    import sklearn_crfsuite
-except ImportError:
-    from pythainlp.tools import install_package
-
-    install_package("sklearn-crfsuite")
-    try:
-        import sklearn_crfsuite
-    except ImportError:
-        raise ImportError("ImportError: Try 'pip install sklearn-crfsuite'")
-
-
 _WORD_TOKENIZER = "newmm"  # ตัวตัดคำ
 _STOPWORDS = thai_stopwords()
 

diff --git a/pythainlp/romanization/__init__.py b/pythainlp/romanization/__init__.py
diff --git a/pythainlp/romanization/pyicu.py b/pythainlp/romanization/pyicu.py
diff --git a/pythainlp/sentiment/ulmfit_sent.py b/pythainlp/sentiment/ulmfit_sent.py
@@ -5,40 +5,15 @@
 """
 from collections import defaultdict
 
+import dill as pickle
+import numpy as np
+import torch
 from pythainlp.corpus import download, get_file
 from pythainlp.tokenize import word_tokenize
+from torch import LongTensor
+from torch.autograd import Variable
 
-try:
-    import numpy as np
-    import dill as pickle
-except ImportError:
-    from pythainlp.tools import install_package
-
-    install_package("numpy")
-    install_package("dill")
-    try:
-        import numpy as np
-        import dill as pickle
-    except ImportError:
-        raise ImportError("ImportError: Try 'pip install numpy dill'")
-
-try:
-    import torch
-    from torch import LongTensor
-    from torch.autograd import Variable
-except ImportError:
-    print("PyTorch required. See https://pytorch.org/.")
-
-# try:
-#     from fastai.text import multiBatchRNN
-# except ImportError:
-#     print(
-#         """
-#     fastai required for multiBatchRNN.
-#     Run 'pip install https://github.com/fastai/fastai/archive/master.zip'
-#     """
-#     )
-
+# from fastai.text import multiBatchRNN
 
 MODEL_NAME = "sent_model"
 ITOS_NAME = "itos_sent"

diff --git a/pythainlp/tag/__init__.py b/pythainlp/tag/__init__.py
@@ -25,19 +25,7 @@ def pos_tag(words, engine="unigram", corpus="orchid"):
     elif engine == "artagger":
 
         def _tag(text, corpus=None):
-            try:
-                from artagger import Tagger
-            except ImportError:
-                from pythainlp.tools import install_package
-
-                install_package(_ARTAGGER_URL)
-                try:
-                    from artagger import Tagger
-                except ImportError:
-                    raise ImportError(
-                        "ImportError: Try 'pip install " + _ARTAGGER_URL + "'"
-                    )
-
+            from artagger import Tagger
             words = Tagger().tag(" ".join(text))
 
             return [(word.word, word.tag) for word in words]

diff --git a/pythainlp/tokenize/deepcut.py b/pythainlp/tokenize/deepcut.py
@@ -3,17 +3,7 @@
 Wrapper for deepcut Thai word segmentation
 """
 
-try:
-    import deepcut
-except ImportError:
-    """ในกรณีที่ยังไม่ติดตั้ง deepcut ในระบบ"""
-    from pythainlp.tools import install_package
-
-    install_package("deepcut")
-    try:
-        import deepcut
-    except ImportError:
-        raise ImportError("ImportError: Try 'pip install deepcut'")
+import deepcut
 
 
 def segment(text):

diff --git a/pythainlp/tokenize/pyicu.py b/pythainlp/tokenize/pyicu.py
@@ -4,20 +4,11 @@
 """
 import re
 
-try:
-    import icu
-except ImportError:
-    from pythainlp.tools import install_package
-
-    install_package("pyicu")
-    try:
-        import icu
-    except ImportError:
-        raise ImportError("ImportError: Try 'pip install pyicu'")
+from icu import BreakIterator, Locale
 
 
 def _gen_words(text):
-    bd = icu.BreakIterator.createWordInstance(icu.Locale("th"))
+    bd = BreakIterator.createWordInstance(Locale("th"))
     bd.setText(text)
     p = bd.first()
     for q in bd: