'Parallel' object is not iterable #73

ggous · 2022-09-27T10:45:13Z

Hello and thanks for this project! It seems very promising!

I am trying to train a xgboost classifier.

My code is:


import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedKFold
from scipy.stats import uniform
from mango.domain.distribution import loguniform
from mango import Tuner
from joblib import Parallel, delayed

xgb_params = {
    'n_estimators': range(10, 200, 50), # 10 to 200 in steps of 50
    'max_depth': range(1, 15), # 1 to 14
    'reg_alpha': loguniform(-3, 6),  # 10^-3 to 10^3
    'booster': ['gbtree', 'gblinear'],
    'colsample_bylevel': uniform(0.05, 0.95), # 0.05 to 1.0
    'colsample_bytree': uniform(0.05, 0.95), # 0.05 to 1.0
    'learning_rate': loguniform(-3, 3),  # 0.001 to 1
    'reg_lambda': loguniform(-3, 6),  # 10^-3 to 10^3
    'min_child_weight': loguniform(0, 2), # 1 to 100
    'subsample': uniform(0.1, 0.89), # 0.1 to 0.99
}
 
class MangoParallelOptimization:
    def __init__(self,
                 njobs, 
                 configuration_params,
                 features_train,
                 target_train,
                 features_val,
                 target_val):
        self.njobs = njobs
        self.conf_dict = configuration_params
        self.x_train = features_train
        self.y_train = target_train
        self.x_val = features_val
        self.y_val = target_val
        self.space = xgb_params
        
        
    def _objective(self, **model_params):
        kfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=123)
        results = []
        for hyper_param in model_params:
            model = xgb.XGBClassifier(**hyper_param)
                                      
            result = cross_val_score(model, 
                                     self.x_train,
                                     self.y_train,
                                     scoring='accuracy',
                                     cv=kfold).mean()
            results.append(result)
        return results
        
           
    def _objective2(self, params_batch):
        global parameters
        results_batch = Parallel(self.njobs,
                                 backend='multiprocessing')
        (delayed(self._objective)(**params) for params in params_batch)
        acc = [result for result in results_batch]
        return acc
    
    def mango_optimization(self):
        tuner = Tuner(self.space, self._objective2, self.conf_dict)
        optimization_results = tuner.maximize()
        return optimization_results['best_params'], optimization_results['best_objective']
    
    
    
if __name__=="__main__":
    df = pd.read_csv('/home/ggous/example.csv')
    df.dropna(axis=1, inplace=True)
    features = df.drop(['id', 'CLASS'], axis=1)
    labels = df['CLASS'].values
    
    # Split the data into training and testing sets
    x_train, x_test, y_train, y_test = train_test_split(
        features,
        labels,
        stratify=labels,
        test_size = 0.2,
        random_state = 123)
    
    # encode string class values as integers
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(y_train)
    y_test = label_encoder.transform(y_test)

    scaler = StandardScaler()
    x_train_sc = pd.DataFrame(scaler.fit_transform(x_train),
                              index=x_train.index, 
                              columns=x_train.columns)
    x_test_sc = scaler.transform(x_test)

    # Parallel optimization with Mango
    config_params = {'num_iteration': 40, 'initial_random': 10}
    optim = MangoParallelOptimization(njobs=4,
                                                            configuration_params=config_params,
                                                            features_train=x_train, 
                                                            target_train=y_train,
                                                            features_val=x_test,
                                                            target_val=y_test)
     
    best_parameters, best_objective = optim.mango_optimization()

    # Results
    print('best parameters:', best_parameters)
    print('best accuracy:', best_objective)
    # Train the model with the best hyper-parameters 
    best_model = xgb.XGBClassifier(n_jobs=-1, **best_parameters)
    best_model.fit(x_train, y_train)

The file I am using is here.

I have some questions:

First of all, running the code , gives : Parallel object is not iterable
If I want to use in the xgb classifier, the following arguments:

'use_label_encoder': False,
'eval_metric': 'mlogloss',
'seed': 123,
'enable_categorical': False

Can I do ?? :

for hyper_param in model_params:
            model = xgb.XGBClassifier(**hyper_param, 
                                                       use_label_encoder'=False,
                                                       eval_metric= 'mlogloss',
                                                       seed= 123,
                                                       enable_categorical= False )

If I want to manually do the k fold , like this:

def _objective(self, **model_params):
  kfold = StratifiedKFold(n_splits=2, shuffle=True, random_state=123)
  for i, (train_idx, val_idx) in enumerate(kfold.split(x_train, y_train)):
      x_train_, y_train_ = x_train[train_idx, :], y_train[train_idx]
      x_val_, y_val_ = x_train[val_idx, :], y_train[val_idx]
          
      model = xgb.XGBClassifier(**hyper_param) 
  
      history = model.fit(x_train_,
                          y_train_,
                          early_stopping_rounds=10,
                          eval_set=[(x_train_, y_train_), (x_val_, y_val_)])
      
      ....

How can I do that? And use the history object inside every fold iteration in order to plot things.
And finally return the result that mango wants.?
What kind of result should be?

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

'Parallel' object is not iterable #73

'Parallel' object is not iterable #73

ggous commented Sep 27, 2022 •

edited

Loading

'Parallel' object is not iterable #73

'Parallel' object is not iterable #73

Comments

ggous commented Sep 27, 2022 • edited Loading

ggous commented Sep 27, 2022 •

edited

Loading