Eliminación recursiva de características usando cross-validation (RFECV)#

  • Ultima modificación: 2023-03-11 | YouTube

[1]:
from sklearn.datasets import make_friedman1

X, y = make_friedman1(
    n_samples=50,
    n_features=10,
    random_state=0,
)

X.shape
[1]:
(50, 10)
[2]:
from sklearn.feature_selection import RFECV
from sklearn.svm import SVR

estimator = SVR(kernel="linear")

selector = RFECV(
    # -------------------------------------------------------------------------
    # A supervised learning estimator with a fit method that provides
    # information about feature importance
    estimator=estimator,
    # -------------------------------------------------------------------------
    # If greater than or equal to 1, then step corresponds to the (integer)
    # number of features to remove at each iteration. If within (0.0, 1.0),
    # then step corresponds to the percentage (rounded down) of features to
    # remove at each iteration.
    step=1,
    # -------------------------------------------------------------------------
    # The minimum number of features to be selected. This number of features
    # will always be scored, even if the difference between the original feature
    # count and min_features_to_select isn’t divisible by step.
    min_features_to_select=1,
    # -------------------------------------------------------------------------
    # Determines the cross-validation splitting strategy.
    cv=5,
    # -------------------------------------------------------------------------
    # A string or a scorer callable object / function
    scoring=None,
    # -------------------------------------------------------------------------
    # Controls verbosity of output.
    verbose=0,
    # -------------------------------------------------------------------------
    # Number of cores to run in parallel while fitting across folds.
    n_jobs=None,
)

selector = selector.fit(X, y)

X_new = selector.transform(X)
X_new.shape
[2]:
(50, 5)
[3]:
#
# The mask of selected features.
#
selector.support_
[3]:
array([ True,  True,  True,  True,  True, False, False, False, False,
       False])
[4]:
#
# The feature ranking, such that ranking_[i] corresponds to the ranking
# position of the i-th feature. Selected (i.e., estimated best) features are
# assigned rank 1.
#
selector.ranking_
[4]:
array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])
[5]:
#
# The fitted estimator used to select features.
#
selector.estimator_
[5]:
SVR(kernel='linear')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.