Merge pull request #860 from EpistasisLab/development

TPOT 10.0.1
EpistasisLab · Apr 19, 2019 · b626271 · b626271
2 parents 75dc2a0 + 43b30f6
commit b626271
Show file tree

Hide file tree

Showing 9 changed files with 95 additions and 40 deletions.
diff --git a/.appveyor.yml b/.appveyor.yml
@@ -2,7 +2,7 @@ build: false
 
 environment:
   matrix:
-    - PYTHON_VERSION: 3.6
+    - PYTHON_VERSION: 3.7
       MINICONDA: C:/Miniconda36-x64
     - PYTHON_VERSION: 2.7
       MINICONDA: C:/Miniconda-x64

diff --git a/.travis.yml b/.travis.yml
@@ -1,12 +1,29 @@
 language: python
-virtualenv:
-  system_site_packages: true
-env:
-  matrix:
-    # let's start simple:
-    - PYTHON_VERSION="2.7" LATEST="true"
-    - PYTHON_VERSION="3.6" COVERAGE="true" LATEST="true"
-    - PYTHON_VERSION="3.6" LATEST="true"
+matrix:
+  # let's start simple:
+  include:
+  - name: "Python 3.7 on Xenial Linux"
+    dist: xenial        # required for Python >= 3.7
+    env: PYTHON_VERSION="3.7"
+    before_install:
+      - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
+  - name: "Python 3.7 on Xenial Linux with coverage"
+    dist: xenial        # required for Python >= 3.7
+    env: PYTHON_VERSION="3.7"  COVERAGE="true"
+    before_install:
+      - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
+  - name: "Python 2.7 on Xenial Linux"
+    dist: xenial
+    env: PYTHON_VERSION="2.7"
+    before_install:
+      - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
+  - name: "Python 3.7 on macOS"
+    os: osx
+    osx_image: xcode10.2  # Python 3.7.2 running on macOS 10.14.3
+    language: shell       # 'language: python' is an error on Travis CI macOS
+    env: PYTHON_VERSION="3.7"
+    before_install:
+      - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh
 install: source ./ci/.travis_install.sh
 script: bash ./ci/.travis_test.sh
 after_success:

diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ Development status: [![Development Build Status - Mac/Linux](https://travis-ci.o
 [![Development Coverage Status](https://coveralls.io/repos/github/EpistasisLab/tpot/badge.svg?branch=development)](https://coveralls.io/github/EpistasisLab/tpot?branch=development)
 
 Package information: [![Python 2.7](https://img.shields.io/badge/python-2.7-blue.svg)](https://www.python.org/download/releases/2.7/)
-[![Python 3.6](https://img.shields.io/badge/python-3.6-blue.svg)](https://www.python.org/downloads/release/python-360/)
+[![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/)
 [![License: LGPL v3](https://img.shields.io/badge/license-LGPL%20v3-blue.svg)](http://www.gnu.org/licenses/lgpl-3.0)
 [![PyPI version](https://badge.fury.io/py/TPOT.svg)](https://badge.fury.io/py/TPOT)
 

diff --git a/ci/.travis_install.sh b/ci/.travis_install.sh
@@ -19,37 +19,24 @@ export CXX=g++
 
 # Deactivate the travis-provided virtual environment and setup a
 # conda-based environment instead
-deactivate
+# deactivate
 
 # Use the miniconda installer for faster download / install of conda
 # itself
-wget https://repo.continuum.io/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh \
-    -O miniconda.sh
-chmod +x miniconda.sh && ./miniconda.sh -b
-export PATH=/home/travis/miniconda3/bin:$PATH
+
+chmod +x miniconda.sh && ./miniconda.sh -b -p $HOME/miniconda
+export PATH="$HOME/miniconda/bin:$PATH"
 conda update --yes conda
 
 # Configure the conda environment and put it in the path using the
 # provided versions
-if [[ "$LATEST" == "true" ]]; then
-    conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
-        numpy scipy scikit-learn cython pandas
-else
-    conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
-        numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
-        scikit-learn=$SKLEARN_VERSION \
-	cython \
-  pandas
-fi
 
-source activate testenv
+conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
+    numpy scipy scikit-learn cython pandas
 
-if [[ "$LATEST" == "true" ]]; then
-    pip install deap
-else
-    pip install deap==$DEAP_VERSION
-fi
+source activate testenv
 
+pip install deap
 pip install update_checker
 pip install tqdm
 pip install stopit

diff --git a/setup.py b/setup.py
@@ -26,11 +26,11 @@ def calculate_version():
 
 Contact
 =============
-If you have any questions or comments about TPOT, please feel free to contact me via:
+If you have any questions or comments about TPOT, please feel free to contact us via:
 
-E-mail: rso@randalolson.com
+E-mail: ttle@pennmedicine.upenn.edu or weixuanf@pennmedicine.upenn.edu
 
-or Twitter: https://twitter.com/randal_olson
+or Twitter: https://twitter.com/trang1618 or https://twitter.com/WeixuanFu
 
 This project is hosted at https://github.com/EpistasisLab/tpot
 ''',
@@ -60,6 +60,7 @@ def calculate_version():
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
         'Topic :: Scientific/Engineering :: Artificial Intelligence'
     ],
     keywords=['pipeline optimization', 'hyperparameter optimization', 'data science', 'machine learning', 'genetic programming', 'evolutionary computation'],

diff --git a/tests/export_tests.py b/tests/export_tests.py
@@ -445,6 +445,39 @@ def test_export_pipeline_5():
     assert expected_code == export_pipeline(pipeline, tpot_obj_reg.operators, tpot_obj_reg._pset)
 
 
+def test_export_pipeline_6():
+    """Assert that exported_pipeline() generated a compile source file with random_state and data_file_path."""
+
+    pipeline_string = (
+        'KNeighborsClassifier('
+        'input_matrix, '
+        'KNeighborsClassifier__n_neighbors=10, '
+        'KNeighborsClassifier__p=1, '
+        'KNeighborsClassifier__weights=uniform'
+        ')'
+    )
+    pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
+    expected_code = """import numpy as np
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+
+# NOTE: Make sure that the class is labeled 'target' in the data file
+tpot_data = pd.read_csv('test_path', sep='COLUMN_SEPARATOR', dtype=np.float64)
+features = tpot_data.drop('target', axis=1).values
+training_features, testing_features, training_target, testing_target = \\
+            train_test_split(features, tpot_data['target'].values, random_state=42)
+
+exported_pipeline = KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform")
+
+exported_pipeline.fit(training_features, training_target)
+results = exported_pipeline.predict(testing_features)
+"""
+    assert expected_code == export_pipeline(pipeline, tpot_obj.operators,
+                                            tpot_obj._pset, random_state=42,
+                                            data_file_path='test_path')
+
+
 def test_operator_export():
     """Assert that a TPOT operator can export properly with a callable function as a parameter."""
     assert list(TPOTSelectPercentile.arg_types) == TPOTSelectPercentile_args

diff --git a/tpot/_version.py b/tpot/_version.py
@@ -23,4 +23,4 @@
 
 """
 
-__version__ = '0.10.0'
+__version__ = '0.10.1'
diff --git a/tpot/base.py b/tpot/base.py
@@ -1083,13 +1083,16 @@ def _create_periodic_checkpoint_folder(self):
                 raise ValueError('Failed creating the periodic_checkpoint_folder:\n{}'.format(e))
 
 
-    def export(self, output_file_name):
+    def export(self, output_file_name, data_file_path=''):
         """Export the optimized pipeline as Python code.
 
         Parameters
         ----------
         output_file_name: string
             String containing the path and file name of the desired output file
+        data_file_path: string (default: '')
+            By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default.
+            If data_file_path is another string, the path will be replaced.
 
         Returns
         -------
@@ -1103,7 +1106,8 @@ def export(self, output_file_name):
         to_write = export_pipeline(self._optimized_pipeline,
                                     self.operators, self._pset,
                                     self._imputed, self._optimized_pipeline_score,
-                                    self.random_state)
+                                    self.random_state,
+                                    data_file_path=data_file_path)
 
         with open(output_file_name, 'w') as output_file:
             output_file.write(to_write)

diff --git a/tpot/export_utils.py b/tpot/export_utils.py
@@ -51,7 +51,11 @@ def get_by_name(opname, operators):
     return ret_op_class
 
 
-def export_pipeline(exported_pipeline, operators, pset, impute=False, pipeline_score=None, random_state=None):
+def export_pipeline(exported_pipeline,
+                    operators, pset,
+                    impute=False, pipeline_score=None,
+                    random_state=None,
+                    data_file_path=''):
     """Generate source code for a TPOT Pipeline.
 
     Parameters
@@ -62,6 +66,13 @@ def export_pipeline(exported_pipeline, operators, pset, impute=False, pipeline_s
         List of operator classes from operator library
     pipeline_score:
         Optional pipeline score to be saved to the exported file
+    impute: bool (False):
+        If impute = True, then adda a imputation step.
+    random_state: integer
+        Random seed in train_test_split function.
+    data_file_path: string (default: '')
+        By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default.
+        If data_file_path is another string, the path will be replaced.
 
     Returns
     -------
@@ -81,14 +92,16 @@ def export_pipeline(exported_pipeline, operators, pset, impute=False, pipeline_s
         pipeline_text += """from sklearn.preprocessing import FunctionTransformer
 from copy import copy
 """
+    if not data_file_path:
+        data_file_path = 'PATH/TO/DATA/FILE'
 
     pipeline_text += """
 # NOTE: Make sure that the class is labeled 'target' in the data file
-tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
+tpot_data = pd.read_csv('{}', sep='COLUMN_SEPARATOR', dtype=np.float64)
 features = tpot_data.drop('target', axis=1).values
 training_features, testing_features, training_target, testing_target = \\
             train_test_split(features, tpot_data['target'].values, random_state={})
-""".format(random_state)
+""".format(data_file_path, random_state)
 
     # Add the imputation step if it was used by TPOT
     if impute: