Introducción al aprendizaje estadístico#

  • Ultima modificación: 2024-01-22 | YouTube

Datasets#

[1]:
#
# Ejemplo de un dataset disponible en sklearn
#
from sklearn import datasets

iris = datasets.load_iris()
data = iris.data

# el formato es (n_samples, n_features). si no tiene este formato
# debe ser preprocesado
data.shape
[1]:
(150, 4)
[2]:
digits = datasets.load_digits()
digits.images.shape
import matplotlib.pyplot as plt

plt.imshow(
    digits.images[-1],
    cmap=plt.cm.gray_r,
)
[2]:
<matplotlib.image.AxesImage at 0x13caf4670>
../_images/02_tutoriales_basicos_02_aprendizaje_estadistico_3_1.png
[3]:
data = digits.images.reshape((digits.images.shape[0], -1))
data.shape
[3]:
(1797, 64)

Estimadores#

[4]:
#
# Parámetros y estimación
#
from sklearn import linear_model

estimator = linear_model.LinearRegression(fit_intercept=True)
estimator.fit(data, digits.target)
[4]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
[5]:
#
# Parámetros estimados
#
estimator.coef_
[5]:
array([-5.75781288e-16,  8.57923376e-02, -1.00949621e-02, -2.83075186e-02,
        5.82128113e-02, -4.65962898e-03, -3.46872516e-02, -5.54517203e-03,
        1.17575413e+00, -2.72886589e-02,  1.03489006e-01,  4.39751669e-02,
       -6.55655744e-02, -7.42239599e-02,  8.77174597e-02,  2.37097336e-01,
       -9.21515850e-01,  2.09262966e-02,  7.90783605e-02, -3.13505587e-02,
       -6.74572273e-02,  4.84382828e-02, -5.51371084e-02, -2.58541429e-01,
       -8.99034170e-01, -1.57869617e-01,  3.73065417e-02,  8.46264203e-02,
        7.20498433e-02,  9.39294415e-02, -1.92571119e-02, -2.80099726e+00,
       -3.42781359e-15, -1.57318573e-01, -2.05647288e-02,  1.35531204e-01,
       -4.56698698e-02,  3.50931490e-02, -1.03113414e-02,  3.10862447e-15,
        1.33616091e-01,  1.10960304e-01, -1.73050733e-02, -6.61512753e-03,
        1.12607142e-01,  5.58271849e-02,  9.23827312e-03,  9.82819205e-02,
        6.02436261e-01,  2.73519707e-02, -5.74530483e-03, -6.65091904e-02,
       -2.06505662e-01, -4.09517963e-02,  1.04008450e-01, -1.47742618e-01,
       -1.24573828e+00, -1.38545269e-01,  8.29622004e-03, -6.17605438e-02,
       -6.54155098e-03, -7.92150871e-02, -2.91475790e-03, -5.47127161e-02])
[6]:
estimator.intercept_
[6]:
3.405961510450279