2017-10-17 3 views
1

Je souhaite appliquer un classificateur de vote à plusieurs classificateurs de pipeline et ajuster les paramètres dans une recherche de grille. Suivre un exemple minimal me donne une erreur. Dois-je faire cela différemment?Utilisation de VotingClassifier dans Sklearn Pipeline

from sklearn.ensemble import RandomForestClassifier 
from sklearn.ensemble import AdaBoostClassifier 
from sklearn.ensemble import VotingClassifier 
p1 = Pipeline([['clf1', RandomForestClassifier()]]) 
p2 = Pipeline([['clf2', AdaBoostClassifier()]]) 
p3 = Pipeline([['clf3', VotingClassifier(estimators=(p1, p2))]]) 
p3.get_params() 

Erreur:

TypeError: cannot convert dictionary update sequence element #0 to a sequence 

Répondre

3

Lorsque vous spécifiez les estimateurs pour VotingClassifier, vous devez donner à chacun d'eux un nom:

from sklearn.pipeline import Pipeline 
from sklearn.ensemble import RandomForestClassifier 
from sklearn.ensemble import AdaBoostClassifier 
from sklearn.ensemble import VotingClassifier 
p1 = Pipeline([['clf1', RandomForestClassifier()]]) 
p2 = Pipeline([['clf2', AdaBoostClassifier()]]) 
p3 = Pipeline([['clf3', VotingClassifier(estimators=[("p1",p1), ("p2",p2)])]]) 
p3.get_params() 

Affichera:

{'clf3': VotingClassifier(estimators=[('p1', Pipeline(steps=[['clf1', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', 
      max_depth=None, max_features='auto', max_leaf_nodes=None, 
      min_impurity_split=1e-07, min_samples_leaf=1, 
      min_samples_split=2, min_weight_fraction...SAMME.R', base_estimator=None, 
      learning_rate=1.0, n_estimators=50, random_state=None)]]))], 
      n_jobs=1, voting='hard', weights=None), 
'clf3__estimators': [('p1', 
    Pipeline(steps=[['clf1', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', 
       max_depth=None, max_features='auto', max_leaf_nodes=None, 
       min_impurity_split=1e-07, min_samples_leaf=1, 
       min_samples_split=2, min_weight_fraction_leaf=0.0, 
       n_estimators=10, n_jobs=1, oob_score=False, random_state=None, 
       verbose=0, warm_start=False)]])), 
    ('p2', 
    Pipeline(steps=[['clf2', AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, 
      learning_rate=1.0, n_estimators=50, random_state=None)]]))], 
'clf3__n_jobs': 1, 
'clf3__p1': Pipeline(steps=[['clf1', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', 
      max_depth=None, max_features='auto', max_leaf_nodes=None, 
      min_impurity_split=1e-07, min_samples_leaf=1, 
      min_samples_split=2, min_weight_fraction_leaf=0.0, 
      n_estimators=10, n_jobs=1, oob_score=False, random_state=None, 
      verbose=0, warm_start=False)]]), 
'clf3__p1__clf1': RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', 
      max_depth=None, max_features='auto', max_leaf_nodes=None, 
      min_impurity_split=1e-07, min_samples_leaf=1, 
      min_samples_split=2, min_weight_fraction_leaf=0.0, 
      n_estimators=10, n_jobs=1, oob_score=False, random_state=None, 
      verbose=0, warm_start=False), 
'clf3__p1__clf1__bootstrap': True, 
'clf3__p1__clf1__class_weight': None, 
'clf3__p1__clf1__criterion': 'gini', 
'clf3__p1__clf1__max_depth': None, 
'clf3__p1__clf1__max_features': 'auto', 
'clf3__p1__clf1__max_leaf_nodes': None, 
'clf3__p1__clf1__min_impurity_split': 1e-07, 
'clf3__p1__clf1__min_samples_leaf': 1, 
'clf3__p1__clf1__min_samples_split': 2, 
'clf3__p1__clf1__min_weight_fraction_leaf': 0.0, 
'clf3__p1__clf1__n_estimators': 10, 
'clf3__p1__clf1__n_jobs': 1, 
'clf3__p1__clf1__oob_score': False, 
'clf3__p1__clf1__random_state': None, 
'clf3__p1__clf1__verbose': 0, 
'clf3__p1__clf1__warm_start': False, 
'clf3__p1__steps': [['clf1', 
    RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', 
       max_depth=None, max_features='auto', max_leaf_nodes=None, 
       min_impurity_split=1e-07, min_samples_leaf=1, 
       min_samples_split=2, min_weight_fraction_leaf=0.0, 
       n_estimators=10, n_jobs=1, oob_score=False, random_state=None, 
       verbose=0, warm_start=False)]], 
'clf3__p2': Pipeline(steps=[['clf2', AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, 
      learning_rate=1.0, n_estimators=50, random_state=None)]]), 
'clf3__p2__clf2': AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, 
      learning_rate=1.0, n_estimators=50, random_state=None), 
'clf3__p2__clf2__algorithm': 'SAMME.R', 
'clf3__p2__clf2__base_estimator': None, 
'clf3__p2__clf2__learning_rate': 1.0, 
'clf3__p2__clf2__n_estimators': 50, 
'clf3__p2__clf2__random_state': None, 
'clf3__p2__steps': [['clf2', 
    AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, 
      learning_rate=1.0, n_estimators=50, random_state=None)]], 
'clf3__voting': 'hard', 
'clf3__weights': None, 
'steps': [['clf3', 
    VotingClassifier(estimators=[('p1', Pipeline(steps=[['clf1', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini', 
       max_depth=None, max_features='auto', max_leaf_nodes=None, 
       min_impurity_split=1e-07, min_samples_leaf=1, 
       min_samples_split=2, min_weight_fraction...SAMME.R', base_estimator=None, 
      learning_rate=1.0, n_estimators=50, random_state=None)]]))], 
      n_jobs=1, voting='hard', weights=None)]]}