DummyClassifier#
Implementa modelos ingenuos para la evaluación de modelos de clasificación.
[1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
#
# Creación de un dataset imbalanceado
#
X, y = load_iris(return_X_y=True)
y[y != 1] = -1
X_train, X_test, y_train, y_test = train_test_split(
X,
y,
random_state=0,
)
[2]:
#
# Creación de un clasificador
#
from sklearn.svm import SVC
clf = SVC(kernel="linear", C=1).fit(X_train, y_train)
clf.score(X_test, y_test)
[2]:
0.631578947368421
[3]:
#
# Creación de un clasificador que pronostica la clase mas frecuente
#
from sklearn.dummy import DummyClassifier
dummy_clf = DummyClassifier(
# -------------------------------------------------------------------------
# Strategy to use to generate predictions.
# * 'most_frequent': the predict method always returns the most frequent
# class label in the observed y argument passed to fit.
# * "prior": the predict method always returns the most frequent class
# label in the observed y argument passed to fit (like “most_frequent”)
# * "stratified": the predict_proba method randomly samples
# one-hot vectors from a multinomial distribution parametrized by the
# empirical class prior probabilities.
# * "uniform": generates predictions uniformly at random from the list of
# unique classes observed in y, i.e. each class has equal probability.
# * "constant": always predicts a constant label that is provided by
# the user. This is useful for metrics that evaluate a non-majority class.
strategy="most_frequent",
# -------------------------------------------------------------------------
# Controls the randomness to generate the predictions when
# strategy='stratified' or strategy='uniform'.
random_state=0,
# -------------------------------------------------------------------------
# The explicit constant as predicted by the “constant” strategy. This
# parameter is useful only for the “constant” strategy.
constant=None,
)
dummy_clf.fit(X_train, y_train)
dummy_clf.score(X_test, y_test)
[3]:
0.5789473684210527
[4]:
clf = SVC(kernel='rbf', C=1).fit(X_train, y_train)
clf.score(X_test, y_test)
[4]:
0.9473684210526315