Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Number converters: convert more than one digit at a time #149

Merged
merged 1 commit into from
Nov 5, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions pythainlp/number/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,23 @@
Number conversions between Thai digits, Arabic digits, and Thai words
"""

from .thainum import (
bahttext,
num_to_thaiword,
from .digitconv import (
arabic_digit_to_thai_digit,
digit_to_text,
text_to_arabic_digit,
text_to_thai_digit,
thai_digit_to_arabic_digit,
)
from .numtoword import bahttext, num_to_thaiword
from .wordtonum import thaiword_to_num

__all__ = ["bahttext", "num_to_thaiword", "thaiword_to_num"]
__all__ = [
"bahttext",
"num_to_thaiword",
"thaiword_to_num",
"arabic_digit_to_thai_digit",
"digit_to_text",
"text_to_arabic_digit",
"text_to_thai_digit",
"thai_digit_to_arabic_digit",
]
132 changes: 132 additions & 0 deletions pythainlp/number/digitconv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# -*- coding: utf-8 -*-
"""
Convert digits
"""

_arabic_thai = {
"0": "๐",
"1": "๑",
"2": "๒",
"3": "๓",
"4": "๔",
"5": "๕",
"6": "๖",
"7": "๗",
"8": "๘",
"9": "๙",
}

_thai_arabic = {
"๐": "0",
"๑": "1",
"๒": "2",
"๓": "3",
"๔": "4",
"๕": "5",
"๖": "6",
"๗": "7",
"๘": "8",
"๙": "9",
}

_digit_spell = {
"0": "ศูนย์",
"1": "หนึ่ง",
"2": "สอง",
"3": "สาม",
"4": "สี่",
"5": "ห้า",
"6": "หก",
"7": "เจ็ด",
"8": "แปด",
"9": "เก้า",
}

_spell_digit = {
"ศูนย์": "0",
"หนึ่ง": "1",
"สอง": "2",
"สาม": "3",
"สี่": "4",
"ห้า": "5",
"หก": "6",
"เจ็ด": "7",
"แปด": "8",
"เก้า": "9",
}


def thai_digit_to_arabic_digit(text):
"""
:param str text: Text with Thai digits such as '๑', '๒', '๓'
:return: Text with Thai digits being converted to Arabic digits such as '1', '2', '3'
"""
if not text:
return ""

newtext = []
for ch in text:
if ch in _thai_arabic:
newtext.append(_thai_arabic[ch])
else:
newtext.append(ch)

return "".join(newtext)


def arabic_digit_to_thai_digit(text):
"""
:param str text: Text with Arabic digits such as '1', '2', '3'
:return: Text with Arabic digits being converted to Thai digits such as '๑', '๒', '๓'
"""
if not text:
return ""

newtext = []
for ch in text:
if ch in _arabic_thai:
newtext.append(_arabic_thai[ch])
else:
newtext.append(ch)

return "".join(newtext)


def digit_to_text(text):
"""
:param str text: Text with digits such as '1', '2', '๓', '๔'
:return: Text with digits being spelled out in Thai
"""
if not text:
return ""

newtext = []
for ch in text:
if ch in _thai_arabic:
ch = _thai_arabic[ch]

if ch in _digit_spell:
newtext.append(_digit_spell[ch])
else:
newtext.append(ch)

return "".join(newtext)


def text_to_arabic_digit(text):
"""
:param text: A digit spelled out in Thai
:return: An Arabic digit such as '1', '2', '3'
"""
if not text or text not in _spell_digit:
return ""

return _spell_digit[text]


def text_to_thai_digit(text):
"""
:param text: A digit spelled out in Thai
:return: A Thai digit such as '๑', '๒', '๓'
"""
return arabic_digit_to_thai_digit(text_to_arabic_digit(text))
70 changes: 1 addition & 69 deletions pythainlp/number/thainum.py → pythainlp/number/numtoword.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
Convert Thai numbers
Convert number value to Thai read out

Adapted from
http://justmindthought.blogspot.com/2012/12/code-php.html
Expand All @@ -9,74 +9,6 @@

__all__ = ["bahttext", "num_to_thaiword"]

_p = [
["ภาษาไทย", "ตัวเลข", "เลขไทย"],
["หนึ่ง", "1", "๑"],
["สอง", "2", "๒"],
["สาม", "3", "๓"],
["สี่", "4", "๔"],
["ห้า", "5", "๕"],
["หก", "6", "๖"],
["หก", "7", "๗"],
["แปด", "8", "๘"],
["เก้า", "9", "๙"],
]


# เลขไทยสู่เลขอารบิก
def thai_num_to_num(text):
"""
:param str text: Thai number characters such as '๑', '๒', '๓'
:return: universal numbers such as '1', '2', '3'
"""
thaitonum = dict((x[2], x[1]) for x in _p[1:])
return thaitonum[text]


def thai_num_to_text(text):
"""
:param str text: Thai number characters such as '๑', '๒', '๓'
:return: Thai numbers, spelled out in Thai
"""
thaitonum = dict((x[2], x[0]) for x in _p[1:])
return thaitonum[text]


def num_to_thai_num(text):
"""
:param text: universal numbers such as '1', '2', '3'
:return: Thai number characters such as '๑', '๒', '๓'
"""
thaitonum = dict((x[1], x[2]) for x in _p[1:])
return thaitonum[text]


def num_to_text(text):
"""
:param text: universal numbers such as '1', '2', '3'
:return: Thai numbers, spelled out in Thai
"""
thaitonum = dict((x[1], x[0]) for x in _p[1:])
return thaitonum[text]


def text_to_num(text):
"""
:param text: Thai numbers, spelled out in Thai
:return: universal numbers such as '1', '2', '3'
"""
thaitonum = dict((x[0], x[1]) for x in _p[1:])
return thaitonum[text]


def text_to_thai_num(text):
"""
:param text: Thai numbers, spelled out in Thai
:return: Thai numbers such as '๑', '๒', '๓'
"""
thaitonum = dict((x[0], x[2]) for x in _p[1:])
return thaitonum[text]


def bahttext(number):
"""
Expand Down
2 changes: 1 addition & 1 deletion pythainlp/number/wordtonum.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
Convert number in words to a computablenumber value
Convert number in words to a computable number value

Adapted from Korakot Chaovavanich's notebook
https://colab.research.google.com/drive/148WNIeclf0kOU6QxKd6pcfwpSs8l-VKD#scrollTo=EuVDd0nNuI8Q
Expand Down
31 changes: 30 additions & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,16 @@
from pythainlp.keywords import find_keyword
from pythainlp.ner import ThaiNameRecognizer
from pythainlp.ner.locations import tag_provinces
from pythainlp.number import bahttext, num_to_thaiword, thaiword_to_num
from pythainlp.number import (
arabic_digit_to_thai_digit,
bahttext,
digit_to_text,
num_to_thaiword,
text_to_arabic_digit,
text_to_thai_digit,
thai_digit_to_arabic_digit,
thaiword_to_num,
)
from pythainlp.rank import rank
from pythainlp.romanization import romanize
from pythainlp.sentiment import sentiment
Expand Down Expand Up @@ -172,6 +181,26 @@ def test_number(self):
self.assertEqual(thaiword_to_num(""), None)
self.assertEqual(thaiword_to_num(None), None)

self.assertEqual(arabic_digit_to_thai_digit("ไทยแลนด์ 4.0"), "ไทยแลนด์ ๔.๐")
self.assertEqual(arabic_digit_to_thai_digit(""), "")
self.assertEqual(arabic_digit_to_thai_digit(None), "")

self.assertEqual(thai_digit_to_arabic_digit("๔๐๔ Not Found"), "404 Not Found")
self.assertEqual(thai_digit_to_arabic_digit(""), "")
self.assertEqual(thai_digit_to_arabic_digit(None), "")

self.assertEqual(digit_to_text("RFC 7258"), "RFC เจ็ดสองห้าแปด")
self.assertEqual(digit_to_text(""), "")
self.assertEqual(digit_to_text(None), "")

self.assertEqual(text_to_arabic_digit("เจ็ด"), "7")
self.assertEqual(text_to_arabic_digit(""), "")
self.assertEqual(text_to_arabic_digit(None), "")

self.assertEqual(text_to_thai_digit("เก้า"), "๙")
self.assertEqual(text_to_thai_digit(""), "")
self.assertEqual(text_to_thai_digit(None), "")

# ### pythainlp.rank

def test_rank(self):
Expand Down