Skip to content

Commit

Permalink
Add Feature and FeatureInventory classes.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 585142666
  • Loading branch information
isingoo authored and copybara-github committed Nov 27, 2023
1 parent af0b07c commit 6a0274e
Show file tree
Hide file tree
Showing 15 changed files with 662 additions and 125 deletions.
103 changes: 103 additions & 0 deletions nisaba/scripts/natural_translit/features/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Copyright 2023 Nisaba Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


package(
default_applicable_licenses = [
],
default_visibility = [
"//nisaba/scripts/natural_translit:__subpackages__",
"//nlp/sweet/saline:__subpackages__",
],
)

licenses(["notice"])

py_library(
name = "feature2",
srcs = ["feature2.py"],
deps = [
"//nisaba/scripts/natural_translit/utils:inventory2",
"//nisaba/scripts/natural_translit/utils:log_op",
"//nisaba/scripts/natural_translit/utils:type_op",
],
)

py_test(
name = "feature2_test",
srcs = ["feature2_test.py"],
main = "feature2_test.py",
deps = [
":feature2",
"//nisaba/scripts/natural_translit/utils:type_op",
"@io_abseil_py//absl/testing:absltest",
],
)

py_library(
name = "orthographic",
srcs = ["orthographic.py"],
deps = [
":feature2",
"//nisaba/scripts/natural_translit/utils:list_op",
],
)

py_test(
name = "orthographic_test",
srcs = ["orthographic_test.py"],
main = "orthographic_test.py",
deps = [
":orthographic",
"@io_abseil_py//absl/testing:absltest",
],
)

py_library(
name = "phonological",
srcs = ["phonological.py"],
deps = [
":feature2",
"//nisaba/scripts/natural_translit/utils:list_op",
],
)

py_test(
name = "phonological_test",
srcs = ["phonological_test.py"],
main = "phonological_test.py",
deps = [
":phonological",
"@io_abseil_py//absl/testing:absltest",
],
)

py_library(
name = "qualifier",
srcs = ["qualifier.py"],
deps = [
":feature2",
"//nisaba/scripts/natural_translit/utils:list_op",
],
)

py_test(
name = "qualifier_test",
srcs = ["qualifier_test.py"],
main = "qualifier_test.py",
deps = [
":qualifier",
"@io_abseil_py//absl/testing:absltest",
],
)
110 changes: 110 additions & 0 deletions nisaba/scripts/natural_translit/features/feature2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Copyright 2023 Nisaba Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Feature, FeatureSet and FeatureInventory classes."""

from typing import Union
from nisaba.scripts.natural_translit.utils import inventory2
from nisaba.scripts.natural_translit.utils import log_op as log
from nisaba.scripts.natural_translit.utils import type_op as ty


class Feature(ty.Thing):
"""Feature class."""

def __init__(self, alias: str, category: str, group: str = ''):
super().__init__()
self.set_alias(alias)
self.text = alias
self.category = category
self.group = group if group else ty.UNASSIGNED


class FeatureSet:
"""FeatureSet class."""

UNION = Union[Feature, 'FeatureSet', ty.Nothing]

def __init__(
self,
*items: UNION
):
super().__init__()
self._items = set()
self.add(*items)

def __iter__(self):
return self._items.__iter__()

def __len__(self):
return len(self._items)

def __str__(self):
return self.str()

def _set(self, arg: UNION) -> set[Feature]:
if isinstance(arg, Feature): return {arg}
if isinstance(arg, FeatureSet): return {f for f in arg}
return set()

def _flat_set(self, *args: UNION) -> set[Feature]:
s = set()
for arg in args:
s.update(self._set(arg))
return s

def str(self):
return '(%s)' % ', '.join(f.text for f in self._items)

def add(self, *args: UNION) -> None:
old = self.str()
self._items.update(self._flat_set(*args))
log.dbg_message('(%s) to %s: %s' % (
', '.join(log.class_and_text(arg) for arg in args),
old, self.str()
))

def remove(self, *args: UNION) -> None:
old = self.str()
for f in self._flat_set(*args):
self._items.discard(f)
log.dbg_message('(%s) to %s: %s' % (
', '.join(log.class_and_text(arg) for arg in args),
old, self.str()
))


class FeatureInventory(inventory2.Inventory):
"""Feature inventory."""

def __init__(self, category: str):
super().__init__()
self.category = category
self.group_aliases = []

def add_feature(self, alias: str) -> None:
self.add_item(Feature(alias, self.category))

def make_group(self, group: str, aliases: list[str]) -> None:
features = []
for alias in aliases:
new = Feature(alias, self.category, group)
if self.add_item(new): features.append(new)
self.make_supp(group, features)
self.group_aliases.append(group)

def add_feature_set(
self, set_alias: str, *features: FeatureSet.UNION
) -> None:
self.make_supp(set_alias, FeatureSet(*features))
59 changes: 59 additions & 0 deletions nisaba/scripts/natural_translit/features/feature2_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright 2023 Nisaba Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from absl.testing import absltest
from nisaba.scripts.natural_translit.features import feature2 as f
from nisaba.scripts.natural_translit.utils import type_op as ty

_f0 = f.Feature('f0', 'c0')
_f1 = f.Feature('f1', 'c1')
_f2 = f.Feature('f2', 'c2')
_f3 = f.Feature('f3', 'c3')
_f4 = f.Feature('f4', 'c4')
_fs0 = f.FeatureSet()
_fs1 = f.FeatureSet(_f1, _f2)
_fs2 = f.FeatureSet(ty.UNSPECIFIED)
_fs3 = f.FeatureSet(_fs1, _f3)
_test = f.FeatureInventory('test')


class Feature2Test(absltest.TestCase):

def test_feature(self):
self.assertEqual(_f0.alias, 'f0')
self.assertEqual(_f0.category, 'c0')
self.assertEqual(_f0.group, ty.UNASSIGNED)

def test_feature_set_empty(self):
self.assertEmpty(_fs0)
self.assertEqual(_fs0.str(), '()')

def test_feature_set_items(self):
self.assertIn(_f1, _fs1)
self.assertIn(_f2, _fs1)

def test_feature_set_nothing(self):
self.assertEmpty(_fs2)

def test_feature_set_feature_set(self):
self.assertEqual(_fs3._items, {_f1, _f2, _f3})

def test_feature_inventory_group(self):
_test.make_group('g', ['gf1', 'gf2'])
self.assertEqual(_test.gf1.group, 'g')
self.assertEqual(_test.g, [_test.gf1, _test.gf2])


if __name__ == '__main__':
absltest.main()
43 changes: 43 additions & 0 deletions nisaba/scripts/natural_translit/features/orthographic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright 2023 Nisaba Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Orthographic features."""

from nisaba.scripts.natural_translit.features import feature2
from nisaba.scripts.natural_translit.utils import list_op as ls


def _script() -> feature2.FeatureInventory:
"""Script inventory."""
f = feature2.FeatureInventory('script')
f.add_feature('test')
ls.apply_foreach(f.make_group, [
['latin', ['basic']],
])
return f

script = _script()


def _grapheme() -> feature2.FeatureInventory:
"""Grapheme feature inventory."""
f = feature2.FeatureInventory('orthographic')
ls.apply_foreach(f.make_group, [
['case', ['lower', 'upper']],
['texttype', ['raw', 'ctrl']],
['dependence', ['standalone', 'combining']],
])
return f

grapheme = _grapheme()
31 changes: 31 additions & 0 deletions nisaba/scripts/natural_translit/features/orthographic_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright 2023 Nisaba Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from absl.testing import absltest
from nisaba.scripts.natural_translit.features import orthographic

_gr = orthographic.grapheme
_sc = orthographic.script


class OrthographicTest(absltest.TestCase):

def test_script(self):
self.assertIn(_sc.basic, _sc.latin)

def test_case(self):
self.assertEqual(_gr.case, [_gr.lower, _gr.upper])

if __name__ == '__main__':
absltest.main()
Loading

0 comments on commit 6a0274e

Please sign in to comment.