DummyClassifier#

  • Implementa modelos ingenuos para la evaluación de modelos de clasificación.

[1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

#
# Creación de un dataset imbalanceado
#
X, y = load_iris(return_X_y=True)

y[y != 1] = -1

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    random_state=0,
)
[2]:
#
# Creación de un clasificador
#
from sklearn.svm import SVC

clf = SVC(kernel="linear", C=1).fit(X_train, y_train)
clf.score(X_test, y_test)
[2]:
0.631578947368421
[3]:
#
# Creación de un clasificador que pronostica la clase mas frecuente
#
from sklearn.dummy import DummyClassifier

dummy_clf = DummyClassifier(
    # -------------------------------------------------------------------------
    # Strategy to use to generate predictions.
    # * 'most_frequent': the predict method always returns the most frequent
    #   class label in the observed y argument passed to fit.
    # * "prior": the predict method always returns the most frequent class
    #   label in the observed y argument passed to fit (like “most_frequent”)
    # * "stratified":  the predict_proba method randomly samples
    #   one-hot vectors from a multinomial distribution parametrized by the
    #   empirical class prior probabilities.
    # * "uniform": generates predictions uniformly at random from the list of
    #   unique classes observed in y, i.e. each class has equal probability.
    # * "constant": always predicts a constant label that is provided by
    # the user. This is useful for metrics that evaluate a non-majority class.
    strategy="most_frequent",
    # -------------------------------------------------------------------------
    # Controls the randomness to generate the predictions when
    # strategy='stratified' or strategy='uniform'.
    random_state=0,
    # -------------------------------------------------------------------------
    # The explicit constant as predicted by the “constant” strategy. This
    # parameter is useful only for the “constant” strategy.
    constant=None,
)

dummy_clf.fit(X_train, y_train)
dummy_clf.score(X_test, y_test)
[3]:
0.5789473684210527
[4]:
clf = SVC(kernel='rbf', C=1).fit(X_train, y_train)
clf.score(X_test, y_test)
[4]:
0.9473684210526315