Skip to content

Commit

Permalink
Merge pull request #39 from awslabs/dev
Browse files Browse the repository at this point in the history
List of constraints support
  • Loading branch information
jaoanan1126 authored May 20, 2021
2 parents 6ee23cf + b846c54 commit 2ee4f70
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 5 deletions.
10 changes: 7 additions & 3 deletions pydeequ/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,20 @@ def __init__(self, spark_session: SparkSession, level: CheckLevel, description:
getattr(self._check_java_class, 'apply$default$3')()
)
self.constraints = constraints if constraints else []
for constraint in self.constraints:
self.addConstraint(constraint)

def addConstraints(self, constraints: list):
self.constraints.extend(constraints)
for constraint in constraints:
self._Check = constraint._Check

def addConstraint(self, constraint):
"""
Returns a new Check object with the given constraints added to the constraints list.
:param Constraint constraint: new constraint to be added.
:return: new Check object
"""
raise NotImplementedError("Private factory method for other check methods")
self.constraints.append(constraint)
self._Check = constraint._Check

def addFilterableContstraint(self, creationFunc):
""" Adds a constraint that can subsequently be replaced with a filtered version
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
def setup_package():
setup(
name='pydeequ',
version="0.1.5",
version="0.1.7",
author="PyDeequ Developers",
author_email="cghyzel@amazon.com",
author_email="calviwan@amazon.com",
description="Python API for Deequ",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
22 changes: 22 additions & 0 deletions tests/test_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,28 @@ def tearDownClass(cls):
cls.spark.sparkContext._gateway.shutdown_callback_server()
cls.spark.stop()

def testCheckConstraints(self):
check = Check(self.spark, CheckLevel.Warning, "test list constraints")
check.addConstraints([check.isComplete('c'),
check.isUnique('b')])
check.addConstraint(check.hasSize(lambda x: x == 3.0))

result = VerificationSuite(self.spark).onData(self.df) \
.addCheck(check) \
.run()

result_df = VerificationResult.checkResultsAsDataFrame(self.spark, result)

check2 = Check(self.spark, CheckLevel.Warning, "test list constraints")

result2 = VerificationSuite(self.spark).onData(self.df) \
.addCheck(check2.isComplete('c').isUnique('b').hasSize(lambda x: x == 3.0)) \
.run()

result_df2 = VerificationResult.checkResultsAsDataFrame(self.spark, result2)

self.assertEqual(result_df.select('constraint').collect(), result_df2.select('constraint').collect())

def test_initializer(self):
# TODO verify that it does more than run
check = Check(self.spark, CheckLevel.Warning, "test initializer")
Expand Down

0 comments on commit 2ee4f70

Please sign in to comment.