From 4ea93b0bbbd96a6f743e4c260f5b717062a0f0bc Mon Sep 17 00:00:00 2001
From: Zhongsheng Ji <9573586@qq.com>
Date: Sat, 23 Dec 2023 10:29:26 +0800
Subject: [PATCH] CLI for singe table synthesizer (#86)
- Intro `Data Exporter` for exporting sampled data to data sources
- CLI updates for synthesizer
---
.github/workflows/extension.yml | 1 +
.../api_reference/data_exporters/base.rst | 9 +
.../data_exporters/csv_exporter.rst | 10 +
.../data_exporters/extension.rst | 11 +
.../api_reference/data_exporters/index.rst | 24 ++
.../api_reference/data_exporters/manager.rst | 9 +
docs/source/api_reference/index.rst | 1 +
.../developer_guides/extension/index.rst | 20 +-
docs/source/user_guides/cli.rst | 22 +
.../dummyexporter/dummyexporter/__init__.py | 1 +
.../dummyexporter/dummyexporter.py | 15 +
.../extension/dummyexporter/pyproject.toml | 27 ++
.../tests/test_registed_exporter.py | 16 +
sdgx/cli/main.py | 398 +++++++++++++++---
sdgx/cli/message.py | 48 +++
sdgx/cli/models.py | 0
sdgx/cli/utils.py | 28 ++
.../__init__.py} | 0
sdgx/data_exporters/base.py | 10 +
sdgx/data_exporters/csv_exporter.py | 41 ++
sdgx/data_exporters/extension.py | 57 +++
sdgx/data_exporters/manager.py | 25 ++
sdgx/exceptions.py | 48 ++-
sdgx/log.py | 9 +-
sdgx/models/manager.py | 4 +-
sdgx/models/ml/single_table/ctgan.py | 4 +-
sdgx/synthesizer.py | 58 +--
tests/cli/test_cli.py | 97 +++++
tests/cli/test_message.py | 41 ++
tests/conftest.py | 5 +-
tests/dataloader/conftest.py | 0
tests/dataloader/test_cacher.py | 2 -
tests/manager/test_exporter.py | 22 +
tests/models/test_copula.py | 4 +-
tests/test_csv_exporter.py | 37 ++
tests/test_synthesizer.py | 2 +-
36 files changed, 1018 insertions(+), 88 deletions(-)
create mode 100644 docs/source/api_reference/data_exporters/base.rst
create mode 100644 docs/source/api_reference/data_exporters/csv_exporter.rst
create mode 100644 docs/source/api_reference/data_exporters/extension.rst
create mode 100644 docs/source/api_reference/data_exporters/index.rst
create mode 100644 docs/source/api_reference/data_exporters/manager.rst
create mode 100644 example/extension/dummyexporter/dummyexporter/__init__.py
create mode 100644 example/extension/dummyexporter/dummyexporter/dummyexporter.py
create mode 100644 example/extension/dummyexporter/pyproject.toml
create mode 100644 example/extension/dummyexporter/tests/test_registed_exporter.py
create mode 100644 sdgx/cli/message.py
delete mode 100644 sdgx/cli/models.py
create mode 100644 sdgx/cli/utils.py
rename sdgx/{cli/exporter.py => data_exporters/__init__.py} (100%)
create mode 100644 sdgx/data_exporters/base.py
create mode 100644 sdgx/data_exporters/csv_exporter.py
create mode 100644 sdgx/data_exporters/extension.py
create mode 100644 sdgx/data_exporters/manager.py
create mode 100644 tests/cli/test_cli.py
create mode 100644 tests/cli/test_message.py
delete mode 100644 tests/dataloader/conftest.py
create mode 100644 tests/manager/test_exporter.py
create mode 100644 tests/test_csv_exporter.py
diff --git a/.github/workflows/extension.yml b/.github/workflows/extension.yml
index e2b9c37b..3fd2ab78 100644
--- a/.github/workflows/extension.yml
+++ b/.github/workflows/extension.yml
@@ -27,6 +27,7 @@ jobs:
python -m pip install -e .[test]
- name: Install all packages in example/extension
run: |
+ python -m pip install -e example/extension/dummyexporter[test]
python -m pip install -e example/extension/dummymetadatainspector[test]
python -m pip install -e example/extension/dummycache[test]
python -m pip install -e example/extension/dummydataconnector[test]
diff --git a/docs/source/api_reference/data_exporters/base.rst b/docs/source/api_reference/data_exporters/base.rst
new file mode 100644
index 00000000..0ac564dc
--- /dev/null
+++ b/docs/source/api_reference/data_exporters/base.rst
@@ -0,0 +1,9 @@
+Base Class for DataExporter
+=======================
+
+.. autoclass:: sdgx.data_exporters.base.DataExporter
+ :members:
+ :undoc-members:
+ :inherited-members:
+ :show-inheritance:
+ :private-members:
diff --git a/docs/source/api_reference/data_exporters/csv_exporter.rst b/docs/source/api_reference/data_exporters/csv_exporter.rst
new file mode 100644
index 00000000..1c1e2725
--- /dev/null
+++ b/docs/source/api_reference/data_exporters/csv_exporter.rst
@@ -0,0 +1,10 @@
+CsvExporter
+=====================================
+
+
+.. autoclass:: sdgx.data_exporters.csv_exporter.CsvExporter
+ :members:
+ :undoc-members:
+ :inherited-members:
+ :show-inheritance:
+ :private-members:
diff --git a/docs/source/api_reference/data_exporters/extension.rst b/docs/source/api_reference/data_exporters/extension.rst
new file mode 100644
index 00000000..65b29e46
--- /dev/null
+++ b/docs/source/api_reference/data_exporters/extension.rst
@@ -0,0 +1,11 @@
+.. _api_reference/data-exporters-extension:
+
+Extension hookspec
+============================
+
+.. automodule:: sdgx.data_exporters.extension
+ :members:
+ :undoc-members:
+ :inherited-members:
+ :show-inheritance:
+ :private-members:
diff --git a/docs/source/api_reference/data_exporters/index.rst b/docs/source/api_reference/data_exporters/index.rst
new file mode 100644
index 00000000..ffd27472
--- /dev/null
+++ b/docs/source/api_reference/data_exporters/index.rst
@@ -0,0 +1,24 @@
+Data Exporter
+========================================================
+
+.. toctree::
+ :maxdepth: 1
+
+ Base Class for DataExporter
+
+Built-in DataExporter
+-----------------------------
+
+.. toctree::
+ :maxdepth: 2
+
+ CsvExporter
+
+Custom DataExporter Relevant
+-----------------------------
+
+.. toctree::
+ :maxdepth: 2
+
+ Extension hookspec
+ DataExporterManager
diff --git a/docs/source/api_reference/data_exporters/manager.rst b/docs/source/api_reference/data_exporters/manager.rst
new file mode 100644
index 00000000..ab96c2ac
--- /dev/null
+++ b/docs/source/api_reference/data_exporters/manager.rst
@@ -0,0 +1,9 @@
+DataExporterManager
+=================================
+
+.. autoclass:: sdgx.data_exporters.manager.DataExporterManager
+ :members:
+ :undoc-members:
+ :inherited-members:
+ :show-inheritance:
+ :private-members:
diff --git a/docs/source/api_reference/index.rst b/docs/source/api_reference/index.rst
index e08737a8..b234122f 100644
--- a/docs/source/api_reference/index.rst
+++ b/docs/source/api_reference/index.rst
@@ -14,6 +14,7 @@ API Reference
Data Processor
Models
Metadata and Inspectors
+ Data Exporter
Manager
Exceptions
Utils
diff --git a/docs/source/developer_guides/extension/index.rst b/docs/source/developer_guides/extension/index.rst
index e028ddd2..885c6053 100644
--- a/docs/source/developer_guides/extension/index.rst
+++ b/docs/source/developer_guides/extension/index.rst
@@ -23,8 +23,18 @@ View latest extension example on `GitHub `
-- :ref:`Data Connector `
-- :ref:`Data Processor `
-- :ref:`Inspector for Metadata `
-- :ref:`Model `
+- :ref:`API Reference for extended Data Connector `:
+ :ref:`Data Connector ` is used to connect to data sources.
+- :ref:`API Reference for extended Cacher for DataLoader `:
+ :ref:`Cacher ` is used for improving performance,
+ reducing network overhead and support large datasets.
+- :ref:`API Reference for extended Data Processor `:
+ :ref:`Data Processor ` is used to pre-process and post-process data.
+ It is useful for business logic.
+- :ref:`API Reference for extended Inspector for Metadata `:
+ :ref:`Inspector ` is used to extract metadata such as patterns, types, etc. from raw data.
+- :ref:`API Reference for extended Model `:
+ :ref:`Model `, the model fitted by processed data and used to generate synthetic data.
+- :ref:`API Reference for extended Data Exporter `:
+ :ref:`Data Exporter ` is used to export data to somewhere.
+ Use it in CLI or library way to save your processed data or synthetic data.
diff --git a/docs/source/user_guides/cli.rst b/docs/source/user_guides/cli.rst
index 2c91d74a..9001cd0d 100644
--- a/docs/source/user_guides/cli.rst
+++ b/docs/source/user_guides/cli.rst
@@ -1,2 +1,24 @@
Command Line Interface
==================================================
+
+Command Line Interface(CLI) is designed to simplify the usage of SDG and enable other programs to use SDG in a more convenient way.
+
+There are tow main commands in the CLI:
+
+- ``fit``: For fitting, finetuning, retraining... the model, which will save the final model to a specified path.
+- ``sample``: Load existing model and sample synthetic data.
+
+And as SDG supports plug-in system, users can list all available via ``list-{component}`` command.
+
+.. Note::
+
+ If you want to use SDG as a library, please refer to :ref:`Use Synthetic Data Generator as a library