Skip to content

Commit

Permalink
added khaiii to disambiguate benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
bab2min committed Apr 13, 2024
1 parent c160f3e commit e8855b9
Showing 1 changed file with 14 additions and 1 deletion.
15 changes: 14 additions & 1 deletion benchmark/disambiguate/disambiguate.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def from_name(name, kiwi_model_path=None, kiwi_model_type='knlm'):
if name == 'hannanum': return HannanumModel()
if name == 'mecab': return MecabModel()
if name == 'okt': return OktModel()
if name == 'khaiii': return KhaiiiModel()

def _convert(self, morph):
raise NotImplementedError()
Expand Down Expand Up @@ -108,6 +109,18 @@ def _convert(self, morph):
def _tokenize(self, text):
return self._mdl.pos(text, stem=True)

class KhaiiiModel(Model):
def __init__(self):
from khaiii import KhaiiiApi
self._mdl = KhaiiiApi()
print("Initialize khaiii ({})".format(self._mdl.version()), file=sys.stderr)

def _convert(self, morph):
return morph.form, (morph.tag[:2] if morph.tag.startswith('V') else morph.tag[:1])

def _tokenize(self, text):
return [(morph.lex, morph.tag) for word in self._mdl.analyze(text) for morph in word.morphs]

def load_dataset(path):
ret = []
for line in open(path, encoding='utf-8'):
Expand Down Expand Up @@ -165,7 +178,7 @@ def main(args):

parser = argparse.ArgumentParser()
parser.add_argument('datasets', nargs='+')
parser.add_argument('--target', default='kiwi', help='kiwi,komoran,mecab,kkma,hannanum,okt')
parser.add_argument('--target', default='kiwi', help='kiwi,komoran,mecab,kkma,hannanum,okt,khaiii')
parser.add_argument('--error_output_dir')
parser.add_argument('--print_all_results', default=False, action='store_true')
parser.add_argument('--kiwi_model_path')
Expand Down

0 comments on commit e8855b9

Please sign in to comment.