Skip to content

Commit

Permalink
Patch on multi table combiner and test case (#89)
Browse files Browse the repository at this point in the history
* Patch on multi table combiner and test case

* More on comments
  • Loading branch information
Wh1isper committed Dec 27, 2023
1 parent 9b4c683 commit 94b1176
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 27 deletions.
13 changes: 9 additions & 4 deletions sdgx/data_models/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ class Metadata(BaseModel):
column_list(list[str]): list of the comlumn name in the table, other columns lists are used to store column information.
"""

# for primary key
# compatible with single primary key or composite primary key
primary_keys: List[str] = []
"""
primary_keys is used to store single primary key or composite primary key
"""

# variables related to columns
# column_list is used to store all columns' name
column_list: List[str] = []
""""
column_list is used to store all columns' name
"""

# other columns lists are used to store column information
# here are 5 basic data types
Expand All @@ -46,6 +48,9 @@ class Metadata(BaseModel):
# version info
metadata_version: str = "1.0"
_extend: Dict[str, Any] = {}
"""
For extend information, use ``get`` and ``set``
"""

def get(self, key: str, default=None) -> Any:
return getattr(self, key, getattr(self._extend, key, default))
Expand Down
34 changes: 16 additions & 18 deletions sdgx/data_models/multi_table_combiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class MultiTableCombiner(BaseModel):
metadata_version: str = "1.0"

metadata_dict: Dict[str, Any] = {}
relationships: List[Any] = []
relationships: List[Relationship] = []

def check(self):
"""Do necessary checks:
Expand All @@ -35,26 +35,24 @@ def check(self):
if metadata_cnt != relationship_cnt + 1:
raise MultiTableCombinerError("Number of tables should corresponds to relationships.")

# table name check
table_names_from_relationships = set()
table_names = set(self.metadata_dict.keys())
relationship_parents = set(r.parent_table for r in self.relationships)
relationship_children = set(r.child_table for r in self.relationships)

# each relationship's table must have metadata
table_names = list(self.metadata_dict.keys())
for each_r in self.relationships:
if each_r.parent_table not in table_names:
raise MultiTableCombinerError(
f"Metadata of parent table {each_r.parent_table} is missing."
)
if each_r.child_table not in table_names:
raise MultiTableCombinerError(
f"Metadata of child table {each_r.child_table} is missing."
)
table_names_from_relationships.add(each_r.parent_table)
table_names_from_relationships.add(each_r.child_table)
if not table_names.issuperset(relationship_parents):
raise MultiTableCombinerError(
f"Relationships' parent table {relationship_parents - table_names} is missing."
)
if not table_names.issuperset(relationship_children):
raise MultiTableCombinerError(
f"Relationships' child table {relationship_children - table_names} is missing."
)

# each table in metadata must in a relationship
for each_t in table_names:
if each_t not in table_names_from_relationships:
raise MultiTableCombinerError(f"Table {each_t} has not relationship.")
if not (relationship_parents + relationship_children).issuperset(table_names):
raise MultiTableCombinerError(
f"Table {table_names - (relationship_parents+relationship_children)} is missing in relationships."
)

logger.info("MultiTableCombiner check finished.")
4 changes: 2 additions & 2 deletions sdgx/data_models/relationship.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import BaseModel

from sdgx.exceptions import RelationshipError
from sdgx.exceptions import RelationshipInitError


class Relationship(BaseModel):
Expand All @@ -26,4 +26,4 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)

if self.parent_table == self.child_table:
raise RelationshipError("child table and parent table cannot be the same")
raise RelationshipInitError("child table and parent table cannot be the same")
2 changes: 1 addition & 1 deletion sdgx/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class MetadataInvalidError(DataModelError):
ERROR_CODE = 9002


class RelationshipError(DataModelError):
class RelationshipInitError(DataModelError):
ERROR_CODE = 9003


Expand Down
5 changes: 3 additions & 2 deletions tests/metadata/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ def test_metadata(metadata: Metadata):
assert metadata.datetime_columns == metadata.get("datetime_columns")
assert metadata.bool_columns == metadata.get("bool_columns")
assert metadata.numeric_columns == metadata.get("numeric_columns")
assert metadata.set("a", 1) == metadata.get("a")
assert metadata.model_dump_json()


def test_metadata_save_load(metadata: Metadata):
test_path = Path("metadata_path_test.json")
def test_metadata_save_load(metadata: Metadata, tmp_path: Path):
test_path = tmp_path / "metadata_path_test.json"
metadata.save(test_path)
# load from path
new_meatadata = Metadata.load(test_path)
Expand Down

0 comments on commit 94b1176

Please sign in to comment.