La función make_classification — 5:02 min#

  • 5:02 min | Ultima modificación: Septiembre 27, 2021 | YouTube

https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_classification.html#sklearn.datasets.make_classification

Generación aleatoria de un problema de clasificación de n clases.

[1]:
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

X, y = make_classification(
    # -------------------------------------------------------------------------
    # The number of samples.
    n_samples=100,
    # -------------------------------------------------------------------------
    # The total number of features. These comprise n_informative informative
    # features, n_redundant redundant features, n_repeated duplicated features
    # and n_features-n_informative-n_redundant-n_repeated useless features
    # drawn at random.
    n_features=2,
    # -------------------------------------------------------------------------
    # The number of informative features.
    n_informative=2,
    # -------------------------------------------------------------------------
    # The number of redundant features.
    n_redundant=0,
    # -------------------------------------------------------------------------
    # The number of duplicated features, drawn randomly from the informative
    # and the redundant features.
    n_repeated=0,
    # -------------------------------------------------------------------------
    # The number of classes (or labels) of the classification problem
    n_classes=2,
    # -------------------------------------------------------------------------
    # The number of clusters per class.
    n_clusters_per_class=1,
    # -------------------------------------------------------------------------
    # The factor multiplying the hypercube size. Larger values spread out the
    # clusters/classes and make the classification task easier.
    class_sep=1.5,
    # -------------------------------------------------------------------------
    # Shuffle the samples.
    shuffle=False,
    # -------------------------------------------------------------------------
    # Determines random number generation for dataset creation.
    random_state=12346,
)


plt.figure(figsize=(8, 8))
plt.scatter(
    X[y == 0, 0],
    X[y == 0, 1],
    color="tab:red",
    edgecolors="k",
    s=120,
    alpha=0.9,
)
plt.scatter(
    X[y == 1, 0],
    X[y == 1, 1],
    color="tab:blue",
    edgecolors="k",
    s=120,
    alpha=0.9,
)


plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)
plt.show()
../_images/53_datasets_21_make_classification_3_0.png
[2]:
X, y = make_classification(
    n_samples=100,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_repeated=0,
    n_classes=2,
    n_clusters_per_class=2,
    class_sep=4.0,
    shuffle=False,
    random_state=12346,
)


plt.figure(figsize=(8, 8))
plt.scatter(
    X[y == 0, 0],
    X[y == 0, 1],
    color="tab:red",
    edgecolors="k",
    s=120,
    alpha=0.9,
)
plt.scatter(
    X[y == 1, 0],
    X[y == 1, 1],
    color="tab:blue",
    edgecolors="k",
    s=120,
    alpha=0.9,
)


plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)
plt.show()
../_images/53_datasets_21_make_classification_4_0.png