Selección de hiperparámetros teniendo en cuenta la generalización — 9:40#

  • Ultima modificación: 2023-02-27 | YouTube

Una pregunta de interés corresponde a ¿cómo seleccionar el grado del polinomio teniendo en cuenta que el error de generalización es una cantidad aleatoria?

Importación de librerías#

[1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures
[2]:
#
# Función a aproximar
# (Proceso geneador de datos)
#
def f(x):
    return 2 * np.abs(np.sin(x * np.pi / 4 + 0.75)) / (1 + 0.1 * x)


#
# Datos reales.
# (No disponibles en la realidad)
#
x_real = np.linspace(0, 10, 100)
x_real = x_real[:, np.newaxis]
y_real = f(x_real)
[3]:
#
# Muestra de datos.
# (Información disponible en la realidad)
#
rng = np.random.default_rng(12345)

x_sample = np.linspace(0, 10, 100)
rng.shuffle(x_sample)
x_sample = x_sample[:25]
x_sample = np.sort(x_sample)

y_sample = f(x_sample)
X_sample = x_sample[:, np.newaxis]
[4]:
#
# Se computa el promedio del SSE sobre 16 muestras
#
plt.figure(figsize=(12, 4))

n_samples = len(x_sample)
n_test = 5
n_train = n_samples - n_test

degrees = list(range(1, 16))

mse_train = []
mse_test = []

for i_degree, degree in enumerate(degrees):

    np.random.seed(12345)

    mse_train_by_sample = []
    mse_test_by_sample = []

    for i_sample in range(17):

        #
        # Modelo
        #
        indexes = np.random.choice(
            n_samples,
            n_train,
            replace=False,
        )

        X_sample_train = X_sample[indexes]
        y_sample_train = y_sample[indexes]

        X_sample_test = np.delete(X_sample, indexes)
        X_sample_test = X_sample_test[:, np.newaxis]
        y_sample_test = np.delete(y_sample, indexes)

        model = make_pipeline(
            PolynomialFeatures(degree, include_bias=False),
            MinMaxScaler(),
            LinearRegression(),
        )
        model.fit(X_sample_train, y_sample_train)

        y_pred_train = model.predict(X_sample_train)
        y_pred_test = model.predict(X_sample_test)

        mse_train_by_sample.append(mean_squared_error(y_sample_train, y_pred_train))
        mse_test_by_sample.append(mean_squared_error(y_sample_test, y_pred_test))

    mse_train.append(np.mean(mse_train_by_sample))
    mse_test.append(np.mean(mse_test_by_sample))

plt.plot(degrees, mse_train, color="tab:blue", linewidth=2, label="fit")
plt.plot(degrees, mse_test, color="tab:orange", linewidth=2, label="test")

plt.yscale("log")


plt.xlabel("Degree")
plt.ylabel("MSE")
plt.grid()
plt.legend()
plt.gca().spines["left"].set_color("gray")
plt.gca().spines["bottom"].set_color("gray")
plt.gca().spines["top"].set_visible(False)
plt.gca().spines["right"].set_visible(False)


plt.show()
../_images/03_conceptos_basicos_de_validacion_cruzada_05_seleccion_de_hiperparametros_6_0.png
[5]:
plt.figure(figsize=(12, 13))

optimal_degree = 8

n_samples = len(x_sample)
n_test = 5
n_train = n_samples - n_test

for i_plot in range(16):

    plt.subplot(4, 4, i_plot + 1)

    #
    # Datos
    #
    plt.plot(x_real, y_real, "--", color="black", alpha=1.0, zorder=10)
    plt.plot(x_sample, y_sample, "o", color="black", alpha=1.0, zorder=10)

    #
    # Modelo
    #
    indexes = np.random.choice(
        n_samples,
        n_train,
        replace=False,
    )

    X_sample_train = X_sample[indexes]
    y_sample_train = y_sample[indexes]

    X_sample_test = np.delete(X_sample, indexes)
    y_sample_test = np.delete(y_sample, indexes)

    model = make_pipeline(
        PolynomialFeatures(optimal_degree),
        MinMaxScaler(),
        LinearRegression(),
    )
    model.fit(X_sample_train, y_sample_train)
    y_predicted = model.predict(x_real)

    plt.plot(x_real, y_predicted, color="tab:blue", linewidth=3, zorder=2, alpha=0.8)

    plt.plot(
        X_sample_test,
        y_sample_test,
        "o",
        color="black",
        fillstyle="none",
        markersize=11,
    )

    plt.xticks([], [])
    plt.yticks([], [])

    plt.gca().spines["left"].set_color("gray")
    plt.gca().spines["bottom"].set_color("gray")
    plt.gca().spines["top"].set_visible(False)
    plt.gca().spines["right"].set_visible(False)


plt.show()
../_images/03_conceptos_basicos_de_validacion_cruzada_05_seleccion_de_hiperparametros_7_0.png