diff --git a/tests/__init__.py b/tests/__init__.py index 2e81d040..ee69de61 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,5 @@ """EthicML""" -from . import loading_data_test, metrics +from . import metrics +from .data import loading_data_test from .metrics import nonparamaterized_metric_test diff --git a/tests/data/__init__.py b/tests/data/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/data_omegaconf_test.py b/tests/data/data_omegaconf_test.py similarity index 100% rename from tests/data_omegaconf_test.py rename to tests/data/data_omegaconf_test.py diff --git a/tests/dataset_modification_test.py b/tests/data/dataset_modification_test.py similarity index 100% rename from tests/dataset_modification_test.py rename to tests/data/dataset_modification_test.py diff --git a/tests/loading_data_test.py b/tests/data/loading_data_test.py similarity index 51% rename from tests/loading_data_test.py rename to tests/data/loading_data_test.py index cca5a07f..73a38188 100644 --- a/tests/loading_data_test.py +++ b/tests/data/loading_data_test.py @@ -1,6 +1,7 @@ """Test the loading data capability.""" from pathlib import Path from typing import Callable, NamedTuple +from typing_extensions import Final import pandas as pd import pytest @@ -41,549 +42,549 @@ def idfn(val: DT): return f"{val}" -@pytest.mark.parametrize( - "dt", - [ - DT( - dataset=Admissions(), - samples=43_303, - x_features=9, - discrete_features=0, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Admissions Gender", - sum_s=22_335, - sum_y=20_263, - ), - DT( - dataset=Admissions(split=Admissions.Splits.GENDER, invert_s=True), - samples=43_303, - x_features=9, - discrete_features=0, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Admissions Gender", - sum_s=43_303 - 22_335, - sum_y=20_263, - ), - DT( - dataset=Adult(), - samples=45_222, - x_features=101, - discrete_features=96, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Adult Sex", - sum_s=30_527, - sum_y=11_208, - ), - DT( - dataset=Adult(split=Adult.Splits.SEX, binarize_nationality=True), - samples=45_222, - x_features=62, - discrete_features=57, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Adult Sex, binary nationality", - sum_s=30_527, - sum_y=11_208, - ), - DT( - dataset=Adult(split=Adult.Splits.SEX, binarize_race=True), - samples=45_222, - x_features=98, - discrete_features=93, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Adult Sex, binary race", - sum_s=30_527, - sum_y=11_208, - ), - DT( - dataset=Adult(split=Adult.Splits.SEX, binarize_nationality=True, binarize_race=True), - samples=45_222, - x_features=59, - discrete_features=54, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Adult Sex, binary nationality, binary race", - sum_s=30_527, - sum_y=11_208, - ), - DT( - dataset=Adult(split=Adult.Splits.SEX), - samples=45_222, - x_features=101, - discrete_features=96, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Adult Sex", - sum_s=30_527, - sum_y=11_208, - ), - DT( - dataset=em.Adult(split=em.Adult.Splits.RACE), - samples=45_222, - x_features=98, - discrete_features=93, - s_features=1, - num_sens=5, - y_features=1, - num_labels=2, - name="Adult Race", - sum_s=166_430, - sum_y=11_208, - ), - DT( - dataset=Adult(split=Adult.Splits.RACE), - samples=45_222, - x_features=98, - discrete_features=93, - s_features=1, - num_sens=5, - y_features=1, - num_labels=2, - name="Adult Race", - sum_s=166_430, - sum_y=11_208, - ), - DT( - dataset=em.Adult(split=Adult.Splits.RACE_BINARY), - samples=45_222, - x_features=98, - discrete_features=93, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Adult Race-Binary", - sum_s=38_903, - sum_y=11_208, - ), - DT( - dataset=em.Adult(split=Adult.Splits.NATIONALITY), - samples=45_222, - x_features=62, - discrete_features=57, - s_features=1, - num_sens=41, - y_features=1, - num_labels=2, - name="Adult Nationality", - sum_s=1_646_127, - sum_y=11_208, - ), - DT( - dataset=em.Adult(split=Adult.Splits.EDUCTAION), - samples=45_222, - x_features=86, - discrete_features=82, - s_features=1, - num_sens=3, - y_features=1, - num_labels=2, - name="Adult Education", - sum_s=50_979, - sum_y=11_208, - ), - DT( - dataset=em.Compas(), - samples=6_167, - x_features=400, - discrete_features=395, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Compas Sex", - sum_s=4_994, - sum_y=2_809, - ), - DT( - dataset=Compas(split=Compas.Splits.SEX), - samples=6_167, - x_features=400, - discrete_features=395, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Compas Sex", - sum_s=4_994, - sum_y=2_809, - ), - DT( - dataset=em.Compas(split=Compas.Splits.RACE), - samples=6_167, - x_features=400, - discrete_features=395, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Compas Race", - sum_s=2_100, - sum_y=2_809, - ), - DT( - dataset=em.Compas(split=Compas.Splits.RACE_SEX), - samples=6_167, - x_features=399, - discrete_features=394, - s_features=1, - num_sens=4, - y_features=1, - num_labels=2, - name="Compas Race-Sex", - sum_s=9_194, - sum_y=2_809, - ), - DT( - dataset=Credit(), - samples=30_000, - x_features=29, - discrete_features=9, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Credit Sex", - sum_s=18_112, - sum_y=6_636, - ), - DT( - dataset=Credit(split=Credit.Splits.SEX), - samples=30_000, - x_features=29, - discrete_features=9, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Credit Sex", - sum_s=18_112, - sum_y=6_636, - ), - DT( - dataset=Crime(), - samples=1_993, - x_features=136, - discrete_features=46, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Crime Race-Binary", - sum_s=970, - sum_y=653, - ), - DT( - dataset=Crime(split=Crime.Splits.RACE_BINARY), - samples=1_993, - x_features=136, - discrete_features=46, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Crime Race-Binary", - sum_s=970, - sum_y=653, - ), - DT( - dataset=German(), - samples=1_000, - x_features=57, - discrete_features=51, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="German Sex", - sum_s=690, - sum_y=300, - ), - DT( - dataset=German(split=German.Splits.SEX), - samples=1_000, - x_features=57, - discrete_features=51, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="German Sex", - sum_s=690, - sum_y=300, - ), - DT( - dataset=em.Health(), - samples=171_067, - x_features=130, - discrete_features=12, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Health", - sum_s=76_450, - sum_y=54_052, - ), - DT( - dataset=em.Health(split=em.Health.Splits.SEX), - samples=171_067, - x_features=130, - discrete_features=12, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Health", - sum_s=76_450, - sum_y=54_052, - ), - DT( - dataset=em.Law(split=em.Law.Splits.SEX), - samples=21_791, - x_features=3, - discrete_features=0, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Law Sex", - sum_s=9_537, - sum_y=19_360, - ), - DT( - dataset=em.Law(split=em.Law.Splits.RACE), - samples=21_791, - x_features=3, - discrete_features=0, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Law Race", - sum_s=18_285, - sum_y=19_360, - ), - DT( - dataset=em.Law(split=em.Law.Splits.SEX_RACE), - samples=21_791, - x_features=3, - discrete_features=0, - s_features=1, - num_sens=16, - y_features=1, - num_labels=2, - name="Law Sex-Race", - sum_s=282_635, - sum_y=19_360, - ), - DT( - dataset=em.Lipton(), - samples=2_000, - x_features=2, - discrete_features=0, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Lipton", - sum_s=989, - sum_y=-562, - ), - DT( - dataset=em.NonBinaryToy(), - samples=400, - x_features=10, - discrete_features=8, - s_features=1, - num_sens=2, - y_features=1, - num_labels=5, - name="NonBinaryToy", - sum_s=200, - sum_y=826, - ), - DT( - dataset=em.Nursery(), - samples=12960, - x_features=22, - discrete_features=21, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Nursery Finance", - sum_s=6480, - sum_y=4320, - ), - DT( - dataset=em.Sqf(), - samples=12_347, - x_features=145, - discrete_features=139, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="SQF Sex", - sum_s=11_394, - sum_y=1_289, - ), - DT( - dataset=em.Sqf(split=em.Sqf.Splits.SEX), - samples=12_347, - x_features=145, - discrete_features=139, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="SQF Sex", - sum_s=11_394, - sum_y=1_289, - ), - DT( - dataset=em.Sqf(split=em.Sqf.Splits.RACE), - samples=12_347, - x_features=145, - discrete_features=139, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="SQF Race", - sum_s=6_471, - sum_y=1_289, - ), - DT( - dataset=em.Sqf(split=em.Sqf.Splits.RACE_SEX), - samples=12_347, - x_features=144, - discrete_features=138, - s_features=1, - num_sens=4, - y_features=1, - num_labels=2, - name="SQF Race-Sex", - sum_s=24_336, - sum_y=1_289, - ), - DT( - dataset=em.Toy(), - samples=400, - x_features=10, - discrete_features=8, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="Toy", - sum_s=200, - sum_y=231, - ), - DT( - dataset=em.AcsIncome(root=Path("~/Data"), year="2018", horizon=1, states=["AL"]), - samples=22_268, - x_features=45, - discrete_features=40, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="ACS_Income_2018_1_AL_Sex", - sum_s=11_622, - sum_y=6_924, - ), - DT( - dataset=em.AcsIncome(root=Path("~/Data"), year="2018", horizon=1, states=["PA"]), - samples=68_308, - x_features=45, - discrete_features=40, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="ACS_Income_2018_1_PA_Sex", - sum_s=35_480, - sum_y=24_385, - ), - DT( - dataset=em.AcsIncome(root=Path("~/Data"), year="2018", horizon=1, states=["AL", "PA"]), - samples=90_576, - x_features=45, - discrete_features=40, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="ACS_Income_2018_1_AL_PA_Sex", - sum_s=47_102, - sum_y=31_309, - ), - DT( - dataset=em.AcsIncome( - root=Path("~/Data"), year="2018", horizon=1, states=["AL"], split="Race" - ), - samples=22_268, - x_features=38, - discrete_features=33, - s_features=1, - num_sens=9, - y_features=1, - num_labels=2, - name="ACS_Income_2018_1_AL_Race", - sum_s=9_947, - sum_y=6_924, - ), - DT( - dataset=em.AcsIncome( - root=Path("~/Data"), year="2018", horizon=1, states=["AL"], split="Sex-Race" - ), - samples=22_268, - x_features=36, - discrete_features=31, - s_features=1, - num_sens=17, - y_features=1, - num_labels=2, - name="ACS_Income_2018_1_AL_Sex-Race", - sum_s=31_516, - sum_y=6_924, - ), - DT( - dataset=em.AcsEmployment(root=Path("~/Data"), year="2018", horizon=1, states=["AL"]), - samples=47_777, - x_features=90, - discrete_features=89, - s_features=1, - num_sens=2, - y_features=1, - num_labels=2, - name="ACS_Employment_2018_1_AL_Sex", - sum_s=22_972, - sum_y=19_575, - ), - ], - ids=idfn, -) +dts: Final = [ + DT( + dataset=Admissions(), + samples=43_303, + x_features=9, + discrete_features=0, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Admissions Gender", + sum_s=22_335, + sum_y=20_263, + ), + DT( + dataset=Admissions(split=Admissions.Splits.GENDER, invert_s=True), + samples=43_303, + x_features=9, + discrete_features=0, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Admissions Gender", + sum_s=43_303 - 22_335, + sum_y=20_263, + ), + DT( + dataset=Adult(), + samples=45_222, + x_features=101, + discrete_features=96, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Adult Sex", + sum_s=30_527, + sum_y=11_208, + ), + DT( + dataset=Adult(split=Adult.Splits.SEX, binarize_nationality=True), + samples=45_222, + x_features=62, + discrete_features=57, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Adult Sex, binary nationality", + sum_s=30_527, + sum_y=11_208, + ), + DT( + dataset=Adult(split=Adult.Splits.SEX, binarize_race=True), + samples=45_222, + x_features=98, + discrete_features=93, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Adult Sex, binary race", + sum_s=30_527, + sum_y=11_208, + ), + DT( + dataset=Adult(split=Adult.Splits.SEX, binarize_nationality=True, binarize_race=True), + samples=45_222, + x_features=59, + discrete_features=54, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Adult Sex, binary nationality, binary race", + sum_s=30_527, + sum_y=11_208, + ), + DT( + dataset=Adult(split=Adult.Splits.SEX), + samples=45_222, + x_features=101, + discrete_features=96, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Adult Sex", + sum_s=30_527, + sum_y=11_208, + ), + DT( + dataset=em.Adult(split=em.Adult.Splits.RACE), + samples=45_222, + x_features=98, + discrete_features=93, + s_features=1, + num_sens=5, + y_features=1, + num_labels=2, + name="Adult Race", + sum_s=166_430, + sum_y=11_208, + ), + DT( + dataset=Adult(split=Adult.Splits.RACE), + samples=45_222, + x_features=98, + discrete_features=93, + s_features=1, + num_sens=5, + y_features=1, + num_labels=2, + name="Adult Race", + sum_s=166_430, + sum_y=11_208, + ), + DT( + dataset=em.Adult(split=Adult.Splits.RACE_BINARY), + samples=45_222, + x_features=98, + discrete_features=93, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Adult Race-Binary", + sum_s=38_903, + sum_y=11_208, + ), + DT( + dataset=em.Adult(split=Adult.Splits.NATIONALITY), + samples=45_222, + x_features=62, + discrete_features=57, + s_features=1, + num_sens=41, + y_features=1, + num_labels=2, + name="Adult Nationality", + sum_s=1_646_127, + sum_y=11_208, + ), + DT( + dataset=em.Adult(split=Adult.Splits.EDUCTAION), + samples=45_222, + x_features=86, + discrete_features=82, + s_features=1, + num_sens=3, + y_features=1, + num_labels=2, + name="Adult Education", + sum_s=50_979, + sum_y=11_208, + ), + DT( + dataset=em.Compas(), + samples=6_167, + x_features=400, + discrete_features=395, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Compas Sex", + sum_s=4_994, + sum_y=2_809, + ), + DT( + dataset=Compas(split=Compas.Splits.SEX), + samples=6_167, + x_features=400, + discrete_features=395, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Compas Sex", + sum_s=4_994, + sum_y=2_809, + ), + DT( + dataset=em.Compas(split=Compas.Splits.RACE), + samples=6_167, + x_features=400, + discrete_features=395, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Compas Race", + sum_s=2_100, + sum_y=2_809, + ), + DT( + dataset=em.Compas(split=Compas.Splits.RACE_SEX), + samples=6_167, + x_features=399, + discrete_features=394, + s_features=1, + num_sens=4, + y_features=1, + num_labels=2, + name="Compas Race-Sex", + sum_s=9_194, + sum_y=2_809, + ), + DT( + dataset=Credit(), + samples=30_000, + x_features=29, + discrete_features=9, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Credit Sex", + sum_s=18_112, + sum_y=6_636, + ), + DT( + dataset=Credit(split=Credit.Splits.SEX), + samples=30_000, + x_features=29, + discrete_features=9, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Credit Sex", + sum_s=18_112, + sum_y=6_636, + ), + DT( + dataset=Crime(), + samples=1_993, + x_features=136, + discrete_features=46, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Crime Race-Binary", + sum_s=970, + sum_y=653, + ), + DT( + dataset=Crime(split=Crime.Splits.RACE_BINARY), + samples=1_993, + x_features=136, + discrete_features=46, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Crime Race-Binary", + sum_s=970, + sum_y=653, + ), + DT( + dataset=German(), + samples=1_000, + x_features=57, + discrete_features=51, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="German Sex", + sum_s=690, + sum_y=300, + ), + DT( + dataset=German(split=German.Splits.SEX), + samples=1_000, + x_features=57, + discrete_features=51, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="German Sex", + sum_s=690, + sum_y=300, + ), + DT( + dataset=em.Health(), + samples=171_067, + x_features=130, + discrete_features=12, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Health", + sum_s=76_450, + sum_y=54_052, + ), + DT( + dataset=em.Health(split=em.Health.Splits.SEX), + samples=171_067, + x_features=130, + discrete_features=12, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Health", + sum_s=76_450, + sum_y=54_052, + ), + DT( + dataset=em.Law(split=em.Law.Splits.SEX), + samples=21_791, + x_features=3, + discrete_features=0, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Law Sex", + sum_s=9_537, + sum_y=19_360, + ), + DT( + dataset=em.Law(split=em.Law.Splits.RACE), + samples=21_791, + x_features=3, + discrete_features=0, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Law Race", + sum_s=18_285, + sum_y=19_360, + ), + DT( + dataset=em.Law(split=em.Law.Splits.SEX_RACE), + samples=21_791, + x_features=3, + discrete_features=0, + s_features=1, + num_sens=16, + y_features=1, + num_labels=2, + name="Law Sex-Race", + sum_s=282_635, + sum_y=19_360, + ), + DT( + dataset=em.Lipton(), + samples=2_000, + x_features=2, + discrete_features=0, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Lipton", + sum_s=989, + sum_y=-562, + ), + DT( + dataset=em.NonBinaryToy(), + samples=400, + x_features=10, + discrete_features=8, + s_features=1, + num_sens=2, + y_features=1, + num_labels=5, + name="NonBinaryToy", + sum_s=200, + sum_y=826, + ), + DT( + dataset=em.Nursery(), + samples=12960, + x_features=22, + discrete_features=21, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Nursery Finance", + sum_s=6480, + sum_y=4320, + ), + DT( + dataset=em.Sqf(), + samples=12_347, + x_features=145, + discrete_features=139, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="SQF Sex", + sum_s=11_394, + sum_y=1_289, + ), + DT( + dataset=em.Sqf(split=em.Sqf.Splits.SEX), + samples=12_347, + x_features=145, + discrete_features=139, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="SQF Sex", + sum_s=11_394, + sum_y=1_289, + ), + DT( + dataset=em.Sqf(split=em.Sqf.Splits.RACE), + samples=12_347, + x_features=145, + discrete_features=139, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="SQF Race", + sum_s=6_471, + sum_y=1_289, + ), + DT( + dataset=em.Sqf(split=em.Sqf.Splits.RACE_SEX), + samples=12_347, + x_features=144, + discrete_features=138, + s_features=1, + num_sens=4, + y_features=1, + num_labels=2, + name="SQF Race-Sex", + sum_s=24_336, + sum_y=1_289, + ), + DT( + dataset=em.Toy(), + samples=400, + x_features=10, + discrete_features=8, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="Toy", + sum_s=200, + sum_y=231, + ), + DT( + dataset=em.AcsIncome(root=Path("~/Data"), year="2018", horizon=1, states=["AL"]), + samples=22_268, + x_features=45, + discrete_features=40, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="ACS_Income_2018_1_AL_Sex", + sum_s=11_622, + sum_y=6_924, + ), + DT( + dataset=em.AcsIncome(root=Path("~/Data"), year="2018", horizon=1, states=["PA"]), + samples=68_308, + x_features=45, + discrete_features=40, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="ACS_Income_2018_1_PA_Sex", + sum_s=35_480, + sum_y=24_385, + ), + DT( + dataset=em.AcsIncome(root=Path("~/Data"), year="2018", horizon=1, states=["AL", "PA"]), + samples=90_576, + x_features=45, + discrete_features=40, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="ACS_Income_2018_1_AL_PA_Sex", + sum_s=47_102, + sum_y=31_309, + ), + DT( + dataset=em.AcsIncome( + root=Path("~/Data"), year="2018", horizon=1, states=["AL"], split="Race" + ), + samples=22_268, + x_features=38, + discrete_features=33, + s_features=1, + num_sens=9, + y_features=1, + num_labels=2, + name="ACS_Income_2018_1_AL_Race", + sum_s=9_947, + sum_y=6_924, + ), + DT( + dataset=em.AcsIncome( + root=Path("~/Data"), year="2018", horizon=1, states=["AL"], split="Sex-Race" + ), + samples=22_268, + x_features=36, + discrete_features=31, + s_features=1, + num_sens=17, + y_features=1, + num_labels=2, + name="ACS_Income_2018_1_AL_Sex-Race", + sum_s=31_516, + sum_y=6_924, + ), + DT( + dataset=em.AcsEmployment(root=Path("~/Data"), year="2018", horizon=1, states=["AL"]), + samples=47_777, + x_features=90, + discrete_features=89, + s_features=1, + num_sens=2, + y_features=1, + num_labels=2, + name="ACS_Employment_2018_1_AL_Sex", + sum_s=22_972, + sum_y=19_575, + ), +] + + +@pytest.mark.parametrize("dt", dts, ids=idfn) +@pytest.mark.xdist_group("data_files") def test_data_shape(dt: DT): """Test loading data.""" data: DataTuple = dt.dataset.load()