Skip to content

Commit

Permalink
Merge pull request #860 from EpistasisLab/development
Browse files Browse the repository at this point in the history
TPOT 10.0.1
  • Loading branch information
weixuanfu authored Apr 19, 2019
2 parents 75dc2a0 + 43b30f6 commit b626271
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 40 deletions.
2 changes: 1 addition & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ build: false

environment:
matrix:
- PYTHON_VERSION: 3.6
- PYTHON_VERSION: 3.7
MINICONDA: C:/Miniconda36-x64
- PYTHON_VERSION: 2.7
MINICONDA: C:/Miniconda-x64
Expand Down
33 changes: 25 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,29 @@
language: python
virtualenv:
system_site_packages: true
env:
matrix:
# let's start simple:
- PYTHON_VERSION="2.7" LATEST="true"
- PYTHON_VERSION="3.6" COVERAGE="true" LATEST="true"
- PYTHON_VERSION="3.6" LATEST="true"
matrix:
# let's start simple:
include:
- name: "Python 3.7 on Xenial Linux"
dist: xenial # required for Python >= 3.7
env: PYTHON_VERSION="3.7"
before_install:
- wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
- name: "Python 3.7 on Xenial Linux with coverage"
dist: xenial # required for Python >= 3.7
env: PYTHON_VERSION="3.7" COVERAGE="true"
before_install:
- wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
- name: "Python 2.7 on Xenial Linux"
dist: xenial
env: PYTHON_VERSION="2.7"
before_install:
- wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
- name: "Python 3.7 on macOS"
os: osx
osx_image: xcode10.2 # Python 3.7.2 running on macOS 10.14.3
language: shell # 'language: python' is an error on Travis CI macOS
env: PYTHON_VERSION="3.7"
before_install:
- wget https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh
install: source ./ci/.travis_install.sh
script: bash ./ci/.travis_test.sh
after_success:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Development status: [![Development Build Status - Mac/Linux](https://travis-ci.o
[![Development Coverage Status](https://coveralls.io/repos/github/EpistasisLab/tpot/badge.svg?branch=development)](https://coveralls.io/github/EpistasisLab/tpot?branch=development)

Package information: [![Python 2.7](https://img.shields.io/badge/python-2.7-blue.svg)](https://www.python.org/download/releases/2.7/)
[![Python 3.6](https://img.shields.io/badge/python-3.6-blue.svg)](https://www.python.org/downloads/release/python-360/)
[![Python 3.7](https://img.shields.io/badge/python-3.7-blue.svg)](https://www.python.org/downloads/release/python-370/)
[![License: LGPL v3](https://img.shields.io/badge/license-LGPL%20v3-blue.svg)](http://www.gnu.org/licenses/lgpl-3.0)
[![PyPI version](https://badge.fury.io/py/TPOT.svg)](https://badge.fury.io/py/TPOT)

Expand Down
29 changes: 8 additions & 21 deletions ci/.travis_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,24 @@ export CXX=g++

# Deactivate the travis-provided virtual environment and setup a
# conda-based environment instead
deactivate
# deactivate

# Use the miniconda installer for faster download / install of conda
# itself
wget https://repo.continuum.io/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh \
-O miniconda.sh
chmod +x miniconda.sh && ./miniconda.sh -b
export PATH=/home/travis/miniconda3/bin:$PATH

chmod +x miniconda.sh && ./miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
conda update --yes conda

# Configure the conda environment and put it in the path using the
# provided versions
if [[ "$LATEST" == "true" ]]; then
conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
numpy scipy scikit-learn cython pandas
else
conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
scikit-learn=$SKLEARN_VERSION \
cython \
pandas
fi

source activate testenv
conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
numpy scipy scikit-learn cython pandas

if [[ "$LATEST" == "true" ]]; then
pip install deap
else
pip install deap==$DEAP_VERSION
fi
source activate testenv

pip install deap
pip install update_checker
pip install tqdm
pip install stopit
Expand Down
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ def calculate_version():
Contact
=============
If you have any questions or comments about TPOT, please feel free to contact me via:
If you have any questions or comments about TPOT, please feel free to contact us via:
E-mail: rso@randalolson.com
E-mail: ttle@pennmedicine.upenn.edu or weixuanf@pennmedicine.upenn.edu
or Twitter: https://twitter.com/randal_olson
or Twitter: https://twitter.com/trang1618 or https://twitter.com/WeixuanFu
This project is hosted at https://github.com/EpistasisLab/tpot
''',
Expand Down Expand Up @@ -60,6 +60,7 @@ def calculate_version():
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Topic :: Scientific/Engineering :: Artificial Intelligence'
],
keywords=['pipeline optimization', 'hyperparameter optimization', 'data science', 'machine learning', 'genetic programming', 'evolutionary computation'],
Expand Down
33 changes: 33 additions & 0 deletions tests/export_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,39 @@ def test_export_pipeline_5():
assert expected_code == export_pipeline(pipeline, tpot_obj_reg.operators, tpot_obj_reg._pset)


def test_export_pipeline_6():
"""Assert that exported_pipeline() generated a compile source file with random_state and data_file_path."""

pipeline_string = (
'KNeighborsClassifier('
'input_matrix, '
'KNeighborsClassifier__n_neighbors=10, '
'KNeighborsClassifier__p=1, '
'KNeighborsClassifier__weights=uniform'
')'
)
pipeline = creator.Individual.from_string(pipeline_string, tpot_obj._pset)
expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('test_path', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \\
train_test_split(features, tpot_data['target'].values, random_state=42)
exported_pipeline = KNeighborsClassifier(n_neighbors=10, p=1, weights="uniform")
exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
"""
assert expected_code == export_pipeline(pipeline, tpot_obj.operators,
tpot_obj._pset, random_state=42,
data_file_path='test_path')


def test_operator_export():
"""Assert that a TPOT operator can export properly with a callable function as a parameter."""
assert list(TPOTSelectPercentile.arg_types) == TPOTSelectPercentile_args
Expand Down
2 changes: 1 addition & 1 deletion tpot/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@
"""

__version__ = '0.10.0'
__version__ = '0.10.1'
8 changes: 6 additions & 2 deletions tpot/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1083,13 +1083,16 @@ def _create_periodic_checkpoint_folder(self):
raise ValueError('Failed creating the periodic_checkpoint_folder:\n{}'.format(e))


def export(self, output_file_name):
def export(self, output_file_name, data_file_path=''):
"""Export the optimized pipeline as Python code.
Parameters
----------
output_file_name: string
String containing the path and file name of the desired output file
data_file_path: string (default: '')
By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default.
If data_file_path is another string, the path will be replaced.
Returns
-------
Expand All @@ -1103,7 +1106,8 @@ def export(self, output_file_name):
to_write = export_pipeline(self._optimized_pipeline,
self.operators, self._pset,
self._imputed, self._optimized_pipeline_score,
self.random_state)
self.random_state,
data_file_path=data_file_path)

with open(output_file_name, 'w') as output_file:
output_file.write(to_write)
Expand Down
19 changes: 16 additions & 3 deletions tpot/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,11 @@ def get_by_name(opname, operators):
return ret_op_class


def export_pipeline(exported_pipeline, operators, pset, impute=False, pipeline_score=None, random_state=None):
def export_pipeline(exported_pipeline,
operators, pset,
impute=False, pipeline_score=None,
random_state=None,
data_file_path=''):
"""Generate source code for a TPOT Pipeline.
Parameters
Expand All @@ -62,6 +66,13 @@ def export_pipeline(exported_pipeline, operators, pset, impute=False, pipeline_s
List of operator classes from operator library
pipeline_score:
Optional pipeline score to be saved to the exported file
impute: bool (False):
If impute = True, then adda a imputation step.
random_state: integer
Random seed in train_test_split function.
data_file_path: string (default: '')
By default, the path of input dataset is 'PATH/TO/DATA/FILE' by default.
If data_file_path is another string, the path will be replaced.
Returns
-------
Expand All @@ -81,14 +92,16 @@ def export_pipeline(exported_pipeline, operators, pset, impute=False, pipeline_s
pipeline_text += """from sklearn.preprocessing import FunctionTransformer
from copy import copy
"""
if not data_file_path:
data_file_path = 'PATH/TO/DATA/FILE'

pipeline_text += """
# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
tpot_data = pd.read_csv('{}', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \\
train_test_split(features, tpot_data['target'].values, random_state={})
""".format(random_state)
""".format(data_file_path, random_state)

# Add the imputation step if it was used by TPOT
if impute:
Expand Down

0 comments on commit b626271

Please sign in to comment.