Skip to content

Commit

Permalink
Ensure that primary and sequence keys cannot be the same (#2106)
Browse files Browse the repository at this point in the history
  • Loading branch information
lajohn4747 authored Jul 8, 2024
1 parent b37827b commit b68ddd3
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 3 deletions.
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ select = [
"PD"
]
ignore = [
"E501",
# pydocstyle
"D107", # Missing docstring in __init__
"D417", # Missing argument descriptions in the docstring, this is a bug from pydocstyle: https://github.com/PyCQA/pydocstyle/issues/449
Expand Down
6 changes: 4 additions & 2 deletions sdv/constraints/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,17 @@ def _validate_inputs(cls, **kwargs):
if missing_values:
errors.append(
ValueError(
f'Missing required values {missing_values} in {article} {constraint} constraint.'
f'Missing required values {missing_values} '
f'in {article} {constraint} constraint.'
)
)

invalid_vals = set(kwargs) - set(args)
if invalid_vals:
errors.append(
ValueError(
f'Invalid values {invalid_vals} are present in {article} {constraint} constraint.'
f'Invalid values {invalid_vals} are present '
f'in {article} {constraint} constraint.'
)
)

Expand Down
8 changes: 8 additions & 0 deletions sdv/metadata/single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,14 @@ def _validate_key(self, column_name, key_type):
raise InvalidMetadataError(f"'{key_type}_key' must be a string.")

keys = {column_name} if isinstance(column_name, str) else set(column_name)
setting_sequence_as_primary = key_type == 'primary' and column_name == self.sequence_key
setting_primary_as_sequence = key_type == 'sequence' and column_name == self.primary_key
if setting_sequence_as_primary or setting_primary_as_sequence:
raise InvalidMetadataError(
f'The column ({column_name}) cannot be set as {key_type}_key as it is already '
f"set as the {'sequence' if key_type == 'primary' else 'primary'}_key."
)

invalid_ids = keys - set(self.columns)
if invalid_ids:
raise InvalidMetadataError(
Expand Down
64 changes: 64 additions & 0 deletions tests/integration/metadata/test_single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,3 +487,67 @@ def test_column_relationship_validation():
# Run and Assert
with pytest.raises(InvalidMetadataError, match=expected_message):
metadata.validate()


def test_metadata_validate_same_sequence_primary():
"""Test metadata validation when both primary and sequence keys are the same."""
# Setup
metadata = SingleTableMetadata.load_from_dict({
'columns': {
'A': {'sdtype': 'id'},
'B': {'sdtype': 'datetime', 'datetime_format': '%Y-%m-%d'},
'C': {'sdtype': 'numerical'},
'D': {'sdtype': 'categorical'},
},
'primary_key': 'A',
'sequence_key': 'A',
})

expected_message = re.escape(
'The following errors were found in the metadata:\n\n'
'The column (A) cannot be set as primary_key as it is already set as the sequence_key.\n'
'The column (A) cannot be set as sequence_key as it is already set as the primary_key.'
)

# Run and Assert
with pytest.raises(InvalidMetadataError, match=expected_message):
metadata.validate()


def test_metadata_set_same_sequence_primary():
"""Test metadata throws error when setting the sequence and primary keys to be the same."""
# Setup
metadata_sequence = SingleTableMetadata.load_from_dict({
'columns': {
'A': {'sdtype': 'id'},
'B': {'sdtype': 'datetime', 'datetime_format': '%Y-%m-%d'},
'C': {'sdtype': 'numerical'},
'D': {'sdtype': 'categorical'},
},
})

# Run and Assert
metadata_sequence.set_sequence_key('A')
error_msg_sequence = re.escape(
'The column (A) cannot be set as primary_key as it is already set as the sequence_key.'
)
with pytest.raises(InvalidMetadataError, match=error_msg_sequence):
metadata_sequence.set_primary_key('A')

# Setup primary first
metadata_primary = SingleTableMetadata.load_from_dict({
'columns': {
'A': {'sdtype': 'id'},
'B': {'sdtype': 'datetime', 'datetime_format': '%Y-%m-%d'},
'C': {'sdtype': 'numerical'},
'D': {'sdtype': 'categorical'},
},
})

# Run and Assert
metadata_primary.set_primary_key('A')
error_msg_sequence = re.escape(
'The column (A) cannot be set as sequence_key as it is already set as the primary_key.'
)
with pytest.raises(InvalidMetadataError, match=error_msg_sequence):
metadata_primary.set_sequence_key('A')
21 changes: 21 additions & 0 deletions tests/unit/metadata/test_single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1541,6 +1541,27 @@ def test__validate_key_dataype_invalid_tuple(self):
# Assert
assert out is False

def test__validate_key_sequence_and_primary_key_same(self):
"""Test ``_validate_key`` for a column used as both sequence and primary keys."""
# Setup
instance_primary = SingleTableMetadata()
instance_primary.primary_key = 'A'
error_msg_primary = re.escape(
'The column (A) cannot be set as sequence_key as it is already set as the primary_key.'
)

instance_sequence = SingleTableMetadata()
instance_sequence.sequence_key = 'A'
error_msg_sequence = re.escape(
'The column (A) cannot be set as primary_key as it is already set as the sequence_key.'
)

# Run and Assert
with pytest.raises(InvalidMetadataError, match=error_msg_primary):
instance_primary._validate_key('A', 'sequence')
with pytest.raises(InvalidMetadataError, match=error_msg_sequence):
instance_sequence._validate_key('A', 'primary')

def test_set_primary_key_validation_dtype(self):
"""Test that ``set_primary_key`` crashes for invalid arguments.
Expand Down

0 comments on commit b68ddd3

Please sign in to comment.