From 973ea4c840a6b98145c34de7e37336a5dcda9144 Mon Sep 17 00:00:00 2001 From: sjh <171846802@qq.com> Date: Sat, 6 Jan 2024 17:32:13 +0800 Subject: [PATCH 01/11] test_jsd --- tests/metrics/test_jsd.py | 53 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 tests/metrics/test_jsd.py diff --git a/tests/metrics/test_jsd.py b/tests/metrics/test_jsd.py new file mode 100644 index 00000000..fb595a43 --- /dev/null +++ b/tests/metrics/test_jsd.py @@ -0,0 +1,53 @@ +import numpy as np +import pandas as pd +import pytest +from sdgx.metrics.column.jsd import JSD + +# 创建测试数据 +real_data_discrete = pd.DataFrame({ + 'col1': ['a', 'b', 'c', 'd', 'e'], + 'col2': ['a', 'b', 'b', 'b', 'e'], +}) + +synthetic_data_discrete = pd.DataFrame({ + 'col1': ['a', 'c', 'd', 'b', 'b'], + 'col2': ['a', 'c', 'a', 'a', 'e'], +}) + +real_data_cotinuous = pd.DataFrame({ + 'col1': [1, 1, 2, 2, 3], + 'col2': [4, 4, 5, 5, 6], +}) + +synthetic_data_cotinuous = pd.DataFrame({ + 'col1': [1, 2, 2, 3, 3], + 'col2': [4, 5, 5, 6, 6], +}) + +# 创建 JSD 实例 +jsd_instance = JSD() + + +def test_jsd_discrete(): + cols = ['col1', 'col2'] + result = jsd_instance.calculate(real_data_discrete, synthetic_data_discrete, cols, discrete=True) + result1 = jsd_instance.calculate(real_data_discrete, real_data_discrete, cols, discrete=True) + result2 = jsd_instance.calculate(synthetic_data_discrete, real_data_discrete, cols, discrete=True) + + assert result >= 0 + assert result <= 1 + assert result1 == 0 + assert result2 == result + + + +def test_jsd_continuous(): + cols = ['col1'] + result = jsd_instance.calculate(real_data_cotinuous, synthetic_data_cotinuous, cols, discrete=False) + result1 = jsd_instance.calculate(real_data_cotinuous, real_data_cotinuous, cols, discrete=False) + result2 = jsd_instance.calculate(synthetic_data_cotinuous, real_data_cotinuous, cols, discrete=False) + + assert result >= 0 + assert result <= 1 + assert result1 == 0 + assert result2 == result \ No newline at end of file From d310f50f9e1f17fbeb6122cb844c7f4cfca0887d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 6 Jan 2024 09:40:11 +0000 Subject: [PATCH 02/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/metrics/test_jsd.py | 70 ++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/tests/metrics/test_jsd.py b/tests/metrics/test_jsd.py index fb595a43..110dc37e 100644 --- a/tests/metrics/test_jsd.py +++ b/tests/metrics/test_jsd.py @@ -1,38 +1,51 @@ import numpy as np import pandas as pd import pytest + from sdgx.metrics.column.jsd import JSD # 创建测试数据 -real_data_discrete = pd.DataFrame({ - 'col1': ['a', 'b', 'c', 'd', 'e'], - 'col2': ['a', 'b', 'b', 'b', 'e'], -}) - -synthetic_data_discrete = pd.DataFrame({ - 'col1': ['a', 'c', 'd', 'b', 'b'], - 'col2': ['a', 'c', 'a', 'a', 'e'], -}) - -real_data_cotinuous = pd.DataFrame({ - 'col1': [1, 1, 2, 2, 3], - 'col2': [4, 4, 5, 5, 6], -}) - -synthetic_data_cotinuous = pd.DataFrame({ - 'col1': [1, 2, 2, 3, 3], - 'col2': [4, 5, 5, 6, 6], -}) +real_data_discrete = pd.DataFrame( + { + "col1": ["a", "b", "c", "d", "e"], + "col2": ["a", "b", "b", "b", "e"], + } +) + +synthetic_data_discrete = pd.DataFrame( + { + "col1": ["a", "c", "d", "b", "b"], + "col2": ["a", "c", "a", "a", "e"], + } +) + +real_data_cotinuous = pd.DataFrame( + { + "col1": [1, 1, 2, 2, 3], + "col2": [4, 4, 5, 5, 6], + } +) + +synthetic_data_cotinuous = pd.DataFrame( + { + "col1": [1, 2, 2, 3, 3], + "col2": [4, 5, 5, 6, 6], + } +) # 创建 JSD 实例 jsd_instance = JSD() def test_jsd_discrete(): - cols = ['col1', 'col2'] - result = jsd_instance.calculate(real_data_discrete, synthetic_data_discrete, cols, discrete=True) + cols = ["col1", "col2"] + result = jsd_instance.calculate( + real_data_discrete, synthetic_data_discrete, cols, discrete=True + ) result1 = jsd_instance.calculate(real_data_discrete, real_data_discrete, cols, discrete=True) - result2 = jsd_instance.calculate(synthetic_data_discrete, real_data_discrete, cols, discrete=True) + result2 = jsd_instance.calculate( + synthetic_data_discrete, real_data_discrete, cols, discrete=True + ) assert result >= 0 assert result <= 1 @@ -40,14 +53,17 @@ def test_jsd_discrete(): assert result2 == result - def test_jsd_continuous(): - cols = ['col1'] - result = jsd_instance.calculate(real_data_cotinuous, synthetic_data_cotinuous, cols, discrete=False) + cols = ["col1"] + result = jsd_instance.calculate( + real_data_cotinuous, synthetic_data_cotinuous, cols, discrete=False + ) result1 = jsd_instance.calculate(real_data_cotinuous, real_data_cotinuous, cols, discrete=False) - result2 = jsd_instance.calculate(synthetic_data_cotinuous, real_data_cotinuous, cols, discrete=False) + result2 = jsd_instance.calculate( + synthetic_data_cotinuous, real_data_cotinuous, cols, discrete=False + ) assert result >= 0 assert result <= 1 assert result1 == 0 - assert result2 == result \ No newline at end of file + assert result2 == result From 931f6ac233cc9d58adaeaa8fa182e172045c2350 Mon Sep 17 00:00:00 2001 From: sjh <171846802@qq.com> Date: Sun, 14 Jan 2024 17:08:48 +0800 Subject: [PATCH 03/11] using the fixture --- tests/metrics/test_jsd.py | 83 +++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 48 deletions(-) diff --git a/tests/metrics/test_jsd.py b/tests/metrics/test_jsd.py index 110dc37e..1a821bcb 100644 --- a/tests/metrics/test_jsd.py +++ b/tests/metrics/test_jsd.py @@ -1,51 +1,40 @@ +from __future__ import annotations +import random + import numpy as np import pandas as pd import pytest - from sdgx.metrics.column.jsd import JSD -# 创建测试数据 -real_data_discrete = pd.DataFrame( - { - "col1": ["a", "b", "c", "d", "e"], - "col2": ["a", "b", "b", "b", "e"], - } -) -synthetic_data_discrete = pd.DataFrame( - { - "col1": ["a", "c", "d", "b", "b"], - "col2": ["a", "c", "a", "a", "e"], - } -) +# 创建测试数据 +@pytest.fixture +def dummy_data(dummy_single_table_path): + yield pd.read_csv(dummy_single_table_path) + + +@pytest.fixture +def test_data(): + role_set = ["admin", "user", "guest"] + df = pd.DataFrame( + { + "role": [random.choice(role_set) for _ in range(10)], + "feature_x": [random.random() for _ in range(10)] + } + ) + return df -real_data_cotinuous = pd.DataFrame( - { - "col1": [1, 1, 2, 2, 3], - "col2": [4, 4, 5, 5, 6], - } -) -synthetic_data_cotinuous = pd.DataFrame( - { - "col1": [1, 2, 2, 3, 3], - "col2": [4, 5, 5, 6, 6], - } -) +@pytest.fixture +def jsd_instance(): + return JSD() -# 创建 JSD 实例 -jsd_instance = JSD() - -def test_jsd_discrete(): - cols = ["col1", "col2"] - result = jsd_instance.calculate( - real_data_discrete, synthetic_data_discrete, cols, discrete=True - ) - result1 = jsd_instance.calculate(real_data_discrete, real_data_discrete, cols, discrete=True) - result2 = jsd_instance.calculate( - synthetic_data_discrete, real_data_discrete, cols, discrete=True - ) +def test_jsd_discrete(dummy_data, test_data, jsd_instance): + cols = ['role'] + result = jsd_instance.calculate(dummy_data, test_data, cols, discrete=True) + result1 = jsd_instance.calculate(dummy_data, dummy_data, cols, discrete=True) + result2 = jsd_instance.calculate(test_data, dummy_data, cols, discrete=True) assert result >= 0 assert result <= 1 @@ -53,17 +42,15 @@ def test_jsd_discrete(): assert result2 == result -def test_jsd_continuous(): - cols = ["col1"] - result = jsd_instance.calculate( - real_data_cotinuous, synthetic_data_cotinuous, cols, discrete=False - ) - result1 = jsd_instance.calculate(real_data_cotinuous, real_data_cotinuous, cols, discrete=False) - result2 = jsd_instance.calculate( - synthetic_data_cotinuous, real_data_cotinuous, cols, discrete=False - ) +def test_jsd_continuous(dummy_data,test_data, jsd_instance): + cols = ["feature_x"] + result = jsd_instance.calculate(dummy_data, test_data, cols, discrete=False) + result1 = jsd_instance.calculate(dummy_data, dummy_data, cols, discrete=False) assert result >= 0 assert result <= 1 assert result1 == 0 - assert result2 == result + + +if __name__ == "__main__": + pytest.main(["-vv", "-s", __file__]) From dfc489e1cdbc7073af14c80427c7632effd3a927 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 14 Jan 2024 09:09:13 +0000 Subject: [PATCH 04/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/metrics/test_jsd.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/metrics/test_jsd.py b/tests/metrics/test_jsd.py index 1a821bcb..a6040875 100644 --- a/tests/metrics/test_jsd.py +++ b/tests/metrics/test_jsd.py @@ -1,9 +1,11 @@ from __future__ import annotations + import random import numpy as np import pandas as pd import pytest + from sdgx.metrics.column.jsd import JSD @@ -19,7 +21,7 @@ def test_data(): df = pd.DataFrame( { "role": [random.choice(role_set) for _ in range(10)], - "feature_x": [random.random() for _ in range(10)] + "feature_x": [random.random() for _ in range(10)], } ) return df @@ -31,7 +33,7 @@ def jsd_instance(): def test_jsd_discrete(dummy_data, test_data, jsd_instance): - cols = ['role'] + cols = ["role"] result = jsd_instance.calculate(dummy_data, test_data, cols, discrete=True) result1 = jsd_instance.calculate(dummy_data, dummy_data, cols, discrete=True) result2 = jsd_instance.calculate(test_data, dummy_data, cols, discrete=True) @@ -42,7 +44,7 @@ def test_jsd_discrete(dummy_data, test_data, jsd_instance): assert result2 == result -def test_jsd_continuous(dummy_data,test_data, jsd_instance): +def test_jsd_continuous(dummy_data, test_data, jsd_instance): cols = ["feature_x"] result = jsd_instance.calculate(dummy_data, test_data, cols, discrete=False) result1 = jsd_instance.calculate(dummy_data, dummy_data, cols, discrete=False) From 6c4d21f2cde9d8c885254de0b7e71b022ded202c Mon Sep 17 00:00:00 2001 From: sjh <171846802@qq.com> Date: Fri, 22 Mar 2024 20:46:53 +0800 Subject: [PATCH 05/11] metadata update --- sdgx/data_models/combiner.py | 1 + sdgx/data_models/metadata.py | 178 ++++++++++++++++++++++++++++++++++- 2 files changed, 177 insertions(+), 2 deletions(-) diff --git a/sdgx/data_models/combiner.py b/sdgx/data_models/combiner.py index 2a1c14e4..6a6ed808 100644 --- a/sdgx/data_models/combiner.py +++ b/sdgx/data_models/combiner.py @@ -35,6 +35,7 @@ class MetadataCombiner(BaseModel): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + self.relationships = [] def check(self): """Do necessary checks: diff --git a/sdgx/data_models/metadata.py b/sdgx/data_models/metadata.py index 263adc23..4e700f88 100644 --- a/sdgx/data_models/metadata.py +++ b/sdgx/data_models/metadata.py @@ -1,14 +1,16 @@ from __future__ import annotations import json +import warnings from collections import defaultdict from collections.abc import Iterable +from copy import deepcopy from itertools import chain from pathlib import Path -from typing import Any, Dict, Set +from typing import Any, Dict, Set, List import pandas as pd -from pydantic import BaseModel +from pydantic import BaseModel, Field from sdgx.data_loader import DataLoader from sdgx.data_models.inspectors.base import RelationshipInspector @@ -34,6 +36,25 @@ class Metadata(BaseModel): column_list(list[str]): list of the comlumn name in the table, other columns lists are used to store column information. """ + _SDTYPE_KWARGS = { + 'numerical': frozenset(['computer_representation']), + 'datetime': frozenset(['datetime_format']), + 'categorical': frozenset(['order', 'order_by']), + 'boolean': frozenset([]), + 'id': frozenset(['regex_format']), + 'unknown': frozenset(['pii']), + } + + _KEYS = frozenset([ + 'columns', + 'primary_key', + 'alternate_keys', + 'sequence_key', + 'sequence_index', + 'column_relationships', + 'METADATA_SPEC_VERSION' + ]) + primary_keys: Set[str] = set() """ primary_keys is used to store single primary key or composite primary key @@ -70,6 +91,159 @@ class Metadata(BaseModel): """ For extend information, use ``get`` and ``set`` """ + columns: Dict = defaultdict(str) + primary_key: str = None + alternate_keys: List[str] = Field(default_factory=list, optional=True) + + def __init__(self, **data: Any): + super().__init__(**data) + self.columns = {} + self.primary_key = None + self.alternate_keys = [] + + # ---------------------------------------------------------------------- + def add_column(self, column_name, **kwargs): + """Add a column to the ``SingleTableMetadata``. + + Args: + column_name (str): + The column name to be added. + kwargs (type): + Any additional key word arguments for the column, where ``sdtype`` is required. + + Raises: + - ``InvalidMetadataError`` if the column already exists. + - ``InvalidMetadataError`` if the ``kwargs`` do not contain ``sdtype``. + - ``InvalidMetadataError`` if the column has unexpected values or ``kwargs`` for the + given ``sdtype``. + - ``InvalidMetadataError`` if the ``pii`` value is not ``True`` or ``False`` when + present. + """ + if column_name in self.columns: + raise Exception( + f"Column name '{column_name}' already exists. Use 'update_column' " + 'to update an existing column.' + ) + + sdtype = kwargs.get('sdtype') + if sdtype is None: + raise Exception(f"Please provide a 'sdtype' for column '{column_name}'.") + + column_kwargs = deepcopy(kwargs) + if sdtype not in self._SDTYPE_KWARGS: + pii = column_kwargs.get('pii', True) + column_kwargs['pii'] = pii + + self.columns[column_name] = column_kwargs + + def update_column(self, column_name, **kwargs): + """Update an existing column in the ``SingleTableMetadata``. + + Args: + column_name (str): + The column name to be updated. + **kwargs (type): + Any key word arguments that describe metadata for the column. + + Raises: + - ``InvalidMetadataError`` if the column doesn't already exist in the + ``SingleTableMetadata``. + - ``InvalidMetadataError`` if the column has unexpected values or ``kwargs`` for the + current + ``sdtype``. + - ``InvalidMetadataError`` if the ``pii`` value is not ``True`` or ``False`` when + present. + """ + _kwargs = deepcopy(kwargs) + if 'sdtype' in kwargs: + sdtype = kwargs.pop('sdtype') + else: + sdtype = self.columns[column_name]['sdtype'] + _kwargs['sdtype'] = sdtype + + self.columns[column_name] = _kwargs + + def set_primary_key(self, column_name): + """Set the metadata primary key. + + Args: + column_name (str): + Name of the primary key column(s). + """ + if column_name in self.alternate_keys: + warnings.warn( + f"'{column_name}' is currently set as an alternate key and will be removed from " + 'that list.' + ) + self.alternate_keys.remove(column_name) + + if self.primary_key is not None: + warnings.warn( + f"There is an existing primary key '{self.primary_key}'." + ' This key will be removed.' + ) + + self.primary_key = column_name + + def remove_primary_key(self): + """Remove the metadata primary key.""" + if self.primary_key is None: + warnings.warn('No primary key exists to remove.') + + self.primary_key = None + + def add_column_relationship(self, relationship_type, column_names): + """Add a column relationship to the metadata. + + Args: + relationship_type (str): + Type of column relationship. + column_names (list[str]): + List of column names in the relationship. + """ + relationship = {'type': relationship_type, 'column_names': column_names} + to_check = [relationship] + self.column_relationships + self.column_relationships.append(relationship) + + def _get_primary_and_alternate_keys(self): + """Get set of primary and alternate keys. + + Returns: + set: + Set of keys. + """ + keys = set(self.alternate_keys) + if self.primary_key: + keys.update({self.primary_key}) + + return keys + + @staticmethod + def _get_invalid_column_values(column, validation_function): + valid = column.apply(validation_function).astype(bool) + + return set(column[~valid]) + + @classmethod + def load_from_dict(cls, metadata_dict): + """Create a ``SingleTableMetadata`` instance from a python ``dict``. + + Args: + metadata_dict (dict): + Python dictionary representing a ``SingleTableMetadata`` object. + + Returns: + Instance of ``SingleTableMetadata``. + """ + instance = cls() + for key in instance._KEYS: + value = deepcopy(metadata_dict.get(key)) + if value: + setattr(instance, f'{key}', value) + + return instance + + # ---------------------------------------------------------------------- @property def tag_fields(self) -> Iterable[str]: From 7a300c8de2354a8729a11d7d1832f95b3e0348da Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 22 Mar 2024 12:50:33 +0000 Subject: [PATCH 06/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sdgx/data_models/metadata.py | 60 +++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/sdgx/data_models/metadata.py b/sdgx/data_models/metadata.py index 4e700f88..83f05470 100644 --- a/sdgx/data_models/metadata.py +++ b/sdgx/data_models/metadata.py @@ -7,7 +7,7 @@ from copy import deepcopy from itertools import chain from pathlib import Path -from typing import Any, Dict, Set, List +from typing import Any, Dict, List, Set import pandas as pd from pydantic import BaseModel, Field @@ -37,23 +37,25 @@ class Metadata(BaseModel): """ _SDTYPE_KWARGS = { - 'numerical': frozenset(['computer_representation']), - 'datetime': frozenset(['datetime_format']), - 'categorical': frozenset(['order', 'order_by']), - 'boolean': frozenset([]), - 'id': frozenset(['regex_format']), - 'unknown': frozenset(['pii']), + "numerical": frozenset(["computer_representation"]), + "datetime": frozenset(["datetime_format"]), + "categorical": frozenset(["order", "order_by"]), + "boolean": frozenset([]), + "id": frozenset(["regex_format"]), + "unknown": frozenset(["pii"]), } - _KEYS = frozenset([ - 'columns', - 'primary_key', - 'alternate_keys', - 'sequence_key', - 'sequence_index', - 'column_relationships', - 'METADATA_SPEC_VERSION' - ]) + _KEYS = frozenset( + [ + "columns", + "primary_key", + "alternate_keys", + "sequence_key", + "sequence_index", + "column_relationships", + "METADATA_SPEC_VERSION", + ] + ) primary_keys: Set[str] = set() """ @@ -122,17 +124,17 @@ def add_column(self, column_name, **kwargs): if column_name in self.columns: raise Exception( f"Column name '{column_name}' already exists. Use 'update_column' " - 'to update an existing column.' + "to update an existing column." ) - sdtype = kwargs.get('sdtype') + sdtype = kwargs.get("sdtype") if sdtype is None: raise Exception(f"Please provide a 'sdtype' for column '{column_name}'.") column_kwargs = deepcopy(kwargs) if sdtype not in self._SDTYPE_KWARGS: - pii = column_kwargs.get('pii', True) - column_kwargs['pii'] = pii + pii = column_kwargs.get("pii", True) + column_kwargs["pii"] = pii self.columns[column_name] = column_kwargs @@ -155,11 +157,11 @@ def update_column(self, column_name, **kwargs): present. """ _kwargs = deepcopy(kwargs) - if 'sdtype' in kwargs: - sdtype = kwargs.pop('sdtype') + if "sdtype" in kwargs: + sdtype = kwargs.pop("sdtype") else: - sdtype = self.columns[column_name]['sdtype'] - _kwargs['sdtype'] = sdtype + sdtype = self.columns[column_name]["sdtype"] + _kwargs["sdtype"] = sdtype self.columns[column_name] = _kwargs @@ -173,14 +175,14 @@ def set_primary_key(self, column_name): if column_name in self.alternate_keys: warnings.warn( f"'{column_name}' is currently set as an alternate key and will be removed from " - 'that list.' + "that list." ) self.alternate_keys.remove(column_name) if self.primary_key is not None: warnings.warn( f"There is an existing primary key '{self.primary_key}'." - ' This key will be removed.' + " This key will be removed." ) self.primary_key = column_name @@ -188,7 +190,7 @@ def set_primary_key(self, column_name): def remove_primary_key(self): """Remove the metadata primary key.""" if self.primary_key is None: - warnings.warn('No primary key exists to remove.') + warnings.warn("No primary key exists to remove.") self.primary_key = None @@ -201,7 +203,7 @@ def add_column_relationship(self, relationship_type, column_names): column_names (list[str]): List of column names in the relationship. """ - relationship = {'type': relationship_type, 'column_names': column_names} + relationship = {"type": relationship_type, "column_names": column_names} to_check = [relationship] + self.column_relationships self.column_relationships.append(relationship) @@ -239,7 +241,7 @@ def load_from_dict(cls, metadata_dict): for key in instance._KEYS: value = deepcopy(metadata_dict.get(key)) if value: - setattr(instance, f'{key}', value) + setattr(instance, f"{key}", value) return instance From c2618d58d37fe9f4014ef958fc94f067980e790f Mon Sep 17 00:00:00 2001 From: sjh <171846802@qq.com> Date: Mon, 1 Apr 2024 14:26:45 +0800 Subject: [PATCH 07/11] update metadata --- sdgx/data_models/metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdgx/data_models/metadata.py b/sdgx/data_models/metadata.py index 4e700f88..9e220e6a 100644 --- a/sdgx/data_models/metadata.py +++ b/sdgx/data_models/metadata.py @@ -92,13 +92,13 @@ class Metadata(BaseModel): For extend information, use ``get`` and ``set`` """ columns: Dict = defaultdict(str) - primary_key: str = None + primary_key: str = 'default' alternate_keys: List[str] = Field(default_factory=list, optional=True) def __init__(self, **data: Any): super().__init__(**data) self.columns = {} - self.primary_key = None + self.primary_key = 'default' self.alternate_keys = [] # ---------------------------------------------------------------------- From 548e44cd40642a0f66e4da487618dcbcef89d173 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 06:30:36 +0000 Subject: [PATCH 08/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sdgx/data_models/metadata.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdgx/data_models/metadata.py b/sdgx/data_models/metadata.py index fb8a4796..01f73174 100644 --- a/sdgx/data_models/metadata.py +++ b/sdgx/data_models/metadata.py @@ -94,13 +94,13 @@ class Metadata(BaseModel): For extend information, use ``get`` and ``set`` """ columns: Dict = defaultdict(str) - primary_key: str = 'default' + primary_key: str = "default" alternate_keys: List[str] = Field(default_factory=list, optional=True) def __init__(self, **data: Any): super().__init__(**data) self.columns = {} - self.primary_key = 'default' + self.primary_key = "default" self.alternate_keys = [] # ---------------------------------------------------------------------- From a3e184c9b2a0c8961956fcfd6f5927fc45bbf39f Mon Sep 17 00:00:00 2001 From: sjh <171846802@qq.com> Date: Mon, 1 Apr 2024 15:36:47 +0800 Subject: [PATCH 09/11] test --- sdgx/data_models/combiner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sdgx/data_models/combiner.py b/sdgx/data_models/combiner.py index 6a6ed808..22fcb5d1 100644 --- a/sdgx/data_models/combiner.py +++ b/sdgx/data_models/combiner.py @@ -35,7 +35,10 @@ class MetadataCombiner(BaseModel): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.relationships = [] + if kwargs['relationships']: + self.relationships = kwargs['relationships'] + else: + self.relationships = [] def check(self): """Do necessary checks: From 6fd80b616c338297214b041d8157953386223bc0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 07:41:13 +0000 Subject: [PATCH 10/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sdgx/data_models/combiner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdgx/data_models/combiner.py b/sdgx/data_models/combiner.py index 22fcb5d1..bf9d9941 100644 --- a/sdgx/data_models/combiner.py +++ b/sdgx/data_models/combiner.py @@ -35,8 +35,8 @@ class MetadataCombiner(BaseModel): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - if kwargs['relationships']: - self.relationships = kwargs['relationships'] + if kwargs["relationships"]: + self.relationships = kwargs["relationships"] else: self.relationships = [] From c52474fb7ab7f621b4590890af7be47d088b80ee Mon Sep 17 00:00:00 2001 From: MoooCat <141886018+MooooCat@users.noreply.github.com> Date: Thu, 23 May 2024 22:51:34 +0800 Subject: [PATCH 11/11] Apply suggestions from code review Co-authored-by: Zhongsheng Ji <9573586@qq.com> --- sdgx/data_models/combiner.py | 7 ++----- sdgx/data_models/metadata.py | 2 -- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/sdgx/data_models/combiner.py b/sdgx/data_models/combiner.py index bf9d9941..90df9bf5 100644 --- a/sdgx/data_models/combiner.py +++ b/sdgx/data_models/combiner.py @@ -33,12 +33,9 @@ class MetadataCombiner(BaseModel): relationships: List[Relationship] = [] - def __init__(self, *args, **kwargs): + def __init__(self, relationships=None, *args, **kwargs): super().__init__(*args, **kwargs) - if kwargs["relationships"]: - self.relationships = kwargs["relationships"] - else: - self.relationships = [] + self.relationships = relationships or [] def check(self): """Do necessary checks: diff --git a/sdgx/data_models/metadata.py b/sdgx/data_models/metadata.py index 01f73174..e28e53f1 100644 --- a/sdgx/data_models/metadata.py +++ b/sdgx/data_models/metadata.py @@ -103,7 +103,6 @@ def __init__(self, **data: Any): self.primary_key = "default" self.alternate_keys = [] - # ---------------------------------------------------------------------- def add_column(self, column_name, **kwargs): """Add a column to the ``SingleTableMetadata``. @@ -245,7 +244,6 @@ def load_from_dict(cls, metadata_dict): return instance - # ---------------------------------------------------------------------- @property def tag_fields(self) -> Iterable[str]: