Skip to content

Commit

Permalink
update for new sklearn version
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhao062@gmail.com authored and yzhao062@gmail.com committed Jul 16, 2023
1 parent 2308737 commit 7dcc90c
Show file tree
Hide file tree
Showing 15 changed files with 73 additions and 45 deletions.
4 changes: 3 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@ include README.rst
include requirements.txt
include suod/models
include suod/models/saved_models/bps_prediction.joblib
include suod/models/saved_models/bps_train.joblib
include suod/models/saved_models/bps_train.joblib
include suod/models/saved_models/bps_prediction_old.joblib
include suod/models/saved_models/bps_train_old.joblib
39 changes: 15 additions & 24 deletions examples/module_examples/M3_BPS/demo_balance_scheduling_full.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,27 @@
import sys
import time
import warnings
import numpy as np
import scipy as sp
from scipy.stats import rankdata
from sklearn.base import clone
import joblib

import arff
import joblib
import numpy as np
from scipy.stats import rankdata
from joblib import effective_n_jobs
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted

from sklearn.preprocessing import StandardScaler
from joblib import effective_n_jobs
import scipy as sp
from joblib import Parallel, delayed
from copy import deepcopy
import arff

from pyod.utils.utility import score_to_label
from joblib import load
from pyod.models.iforest import IForest
from joblib import effective_n_jobs
from pyod.models.abod import ABOD
from pyod.models.cblof import CBLOF
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.hbos import HBOS
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.pca import PCA
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.mcd import MCD
from pyod.models.lscp import LSCP
from scipy.stats import rankdata
from sklearn.base import clone
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_array

if not sys.warnoptions:
warnings.simplefilter("ignore")
Expand Down Expand Up @@ -147,7 +137,7 @@ def _partition_estimators(n_estimators, n_jobs):

# Partition estimators between jobs
n_estimators_per_job = np.full(n_jobs, n_estimators // n_jobs,
dtype=np.int)
dtype=int)
n_estimators_per_job[:n_estimators % n_jobs] += 1
starts = np.cumsum(n_estimators_per_job)

Expand Down Expand Up @@ -317,6 +307,7 @@ def cost_forecast_meta(clf, X, base_estimator_names):
base_estimator_names.append(idx_clf_mapping[i])

this_directory = os.path.abspath(os.path.dirname(__file__))

cost_forecast_loc_fit_ = os.path.join(
this_directory, 'saved_models', 'bps_train.joblib')

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def _partition_estimators(n_estimators, n_jobs):

# Partition estimators between jobs
n_estimators_per_job = np.full(n_jobs, n_estimators // n_jobs,
dtype=np.int)
dtype=int)
n_estimators_per_job[:n_estimators % n_jobs] += 1
starts = np.cumsum(n_estimators_per_job)

Expand Down
12 changes: 9 additions & 3 deletions suod/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from suod.models.parallel_processes import _parallel_decision_function
from suod.models.parallel_processes import _partition_estimators
from suod.models.parallel_processes import _parallel_approx_estimators
from ..utils.utility import _unfold_parallel, build_codes
from ..utils.utility import _unfold_parallel, build_codes, _get_sklearn_version

import warnings
from collections import defaultdict
Expand Down Expand Up @@ -216,8 +216,14 @@ def _parameter_validation(self, contamination, n_jobs, rp_clf_list,

# validate the trained model
if cost_forecast_loc_fit is None:
self.cost_forecast_loc_fit_ = os.path.join(
this_directory, 'saved_models', 'bps_train.joblib')

sklearn_version = _get_sklearn_version()
if sklearn_version[:3] >= '1.3':
self.cost_forecast_loc_fit_ = os.path.join(
this_directory, 'saved_models', 'bps_train.joblib')
else:
self.cost_forecast_loc_fit_ = os.path.join(
this_directory, 'saved_models', 'bps_train_old.joblib')
else:
self.cost_forecast_loc_fit_ = cost_forecast_loc_fit

Expand Down
2 changes: 1 addition & 1 deletion suod/models/parallel_processes.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def _partition_estimators(n_estimators, n_jobs, verbose=False):

# Partition estimators between jobs
n_estimators_per_job = np.full(n_jobs, n_estimators // n_jobs,
dtype=np.int)
dtype=int)
n_estimators_per_job[:n_estimators % n_jobs] += 1
starts = np.cumsum(n_estimators_per_job)

Expand Down
Binary file modified suod/models/saved_models/bps_prediction.joblib
Binary file not shown.
Binary file not shown.
Binary file modified suod/models/saved_models/bps_train.joblib
Binary file not shown.
Binary file added suod/models/saved_models/bps_train_old.joblib
Binary file not shown.
Binary file modified suod/test/bps_prediction.joblib
Binary file not shown.
Binary file added suod/test/bps_prediction_old.joblib
Binary file not shown.
Binary file modified suod/test/bps_train.joblib
Binary file not shown.
Binary file added suod/test/bps_train_old.joblib
Binary file not shown.
17 changes: 13 additions & 4 deletions suod/test/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from suod.models.base import SUOD
from suod.utils.utility import _get_sklearn_version
from pyod.utils.data import generate_data
from pyod.models.lof import LOF
from pyod.models.pca import PCA
Expand Down Expand Up @@ -46,11 +47,19 @@ def setUp(self):

this_directory = os.path.abspath(os.path.dirname(__file__))

self.cost_forecast_loc_fit_ = os.path.join(this_directory,
'bps_train.joblib')
sklearn_version = _get_sklearn_version()
if sklearn_version[:3] >= '1.3':
self.cost_forecast_loc_fit_ = os.path.join(this_directory,
'bps_train.joblib')

self.cost_forecast_loc_pred_ = os.path.join(this_directory,
'bps_prediction.joblib')
self.cost_forecast_loc_pred_ = os.path.join(this_directory,
'bps_prediction.joblib')
else:
self.cost_forecast_loc_fit_ = os.path.join(this_directory,
'bps_train_old.joblib')

self.cost_forecast_loc_pred_ = os.path.join(this_directory,
'bps_prediction_old.joblib')

self.model = SUOD(base_estimators=self.base_estimators, n_jobs=2,
rp_flag_global=True, bps_flag=True,
Expand Down
42 changes: 31 additions & 11 deletions suod/utils/utility.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
# Author: Yue Zhao <zhaoy@cmu.edu>
# License: MIT
import numpy as np
from scipy.special import erf
from sklearn.preprocessing import MinMaxScaler
# suppress warnings
import warnings

import numpy as np
import sklearn
from pyod.models.abod import ABOD
from pyod.models.hbos import HBOS
from pyod.models.iforest import IForest
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.lscp import LSCP
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.pca import PCA
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.mcd import MCD
from pyod.models.lscp import LSCP

# suppress warnings
import warnings
from scipy.special import erf
from sklearn.preprocessing import MinMaxScaler

clf_idx_mapping = {
'ABOD': 1,
Expand Down Expand Up @@ -168,6 +168,26 @@ def raw_score_to_proba(decision_scores, test_scores, method='linear'):
method, 'is not a valid probability conversion method')


def _get_sklearn_version(): # pragma: no cover
""" Utility function to decide the version of sklearn.
PyOD will result in different behaviors with different sklearn version
Returns
-------
sk_learn version : int
"""

sklearn_version = str(sklearn.__version__)
# print(sklearn_version)
# if int(sklearn_version.split(".")[1]) < 19 or int(
# sklearn_version.split(".")[1]) > 24:
# raise ValueError("Sklearn version error")
# print(sklearn_version)

return sklearn_version


def get_estimators_small(contamination=0.1):
"""Internal method to create a list of 600 base outlier detectors.
Expand Down

0 comments on commit 7dcc90c

Please sign in to comment.