QuantileTransformer#

  • Este método transforma las características para que sigan una distribución normal o uniforme.

  • Tiende a dispersar los valores más frecuentes.

  • Reduce el impacto de los outliers.

  • Se aplica a cada columna del dataset de forma independiente.

  • Cada columna es transformada usando la siguiente fórmula:

    G^{-1}(F(X))

    donde F es la distribución acumulada de la características y G^{-1} es la función cuartil de la distribución deseada G.

[1]:
import seaborn as sns

penguins = sns.load_dataset("penguins")

data = penguins[["flipper_length_mm"]]
data = data.rename(columns={"flipper_length_mm": "original"})
[2]:
from sklearn.preprocessing import QuantileTransformer

quantileTransformer_uniform = QuantileTransformer(
    # -------------------------------------------------------------------------
    # Number of quantiles to be computed
    n_quantiles=100,
    # -------------------------------------------------------------------------
    # 'uniform' / 'normal'
    output_distribution="uniform",
    # -------------------------------------------------------------------------
    # Maximum number of samples used to estimate the quantiles for
    # computational efficiency.
    subsample=100000,
    # -------------------------------------------------------------------------
    # Determines random number generation for subsampling and smoothing noise.
    random_state=None,
)
quantileTransformer_uniform.fit(data[["original"]])

data["transformed_uniform"] = quantileTransformer_uniform.transform(data[["original"]])
[3]:
quantileTransformer_normal = QuantileTransformer(
    n_quantiles=100,
    output_distribution="normal",
)

quantileTransformer_normal.fit(data[["original"]])

data["transformed_normal"] = quantileTransformer_normal.transform(data[["original"]])
[4]:
g = sns.jointplot(x="original", y="transformed_uniform", data=data, kind="scatter")
g.fig.set_figwidth(3)
g.fig.set_figheight(3)
../_images/19_preprocesamiento_de_datos_05_QuantileTransformer_9_0.png
[5]:
g = sns.jointplot(x="original", y="transformed_normal", data=data, kind="scatter")
g.fig.set_figwidth(3)
g.fig.set_figheight(3)
../_images/19_preprocesamiento_de_datos_05_QuantileTransformer_10_0.png
[6]:
quantileTransformer_normal.n_quantiles_
[6]:
100
[7]:
quantileTransformer_normal.quantiles_
[7]:
array([[172.        ],
       [178.        ],
       [178.88888889],
       [180.        ],
       [181.        ],
       [181.        ],
       [182.        ],
       [183.11111111],
       [184.        ],
       [184.        ],
       [185.        ],
       [185.        ],
       [186.        ],
       [186.        ],
       [187.        ],
       [187.        ],
       [187.        ],
       [187.        ],
       [187.        ],
       [188.        ],
       [188.        ],
       [189.        ],
       [189.        ],
       [190.        ],
       [190.        ],
       [190.        ],
       [190.        ],
       [190.        ],
       [190.        ],
       [191.        ],
       [191.        ],
       [191.        ],
       [191.        ],
       [192.        ],
       [192.        ],
       [193.        ],
       [193.        ],
       [193.        ],
       [193.        ],
       [194.        ],
       [194.        ],
       [195.        ],
       [195.        ],
       [195.        ],
       [195.        ],
       [195.        ],
       [196.        ],
       [196.        ],
       [196.33333333],
       [197.        ],
       [197.        ],
       [197.66666667],
       [198.        ],
       [198.        ],
       [199.        ],
       [199.44444444],
       [200.        ],
       [201.        ],
       [201.77777778],
       [202.22222222],
       [203.        ],
       [205.        ],
       [207.        ],
       [208.        ],
       [208.        ],
       [209.        ],
       [209.33333333],
       [210.        ],
       [210.        ],
       [210.        ],
       [210.11111111],
       [212.        ],
       [212.        ],
       [213.        ],
       [213.        ],
       [214.        ],
       [214.        ],
       [215.        ],
       [215.        ],
       [215.        ],
       [216.        ],
       [216.        ],
       [216.44444444],
       [217.        ],
       [218.        ],
       [218.        ],
       [219.        ],
       [220.        ],
       [220.        ],
       [220.55555556],
       [221.        ],
       [222.        ],
       [222.        ],
       [224.        ],
       [225.        ],
       [226.44444444],
       [228.        ],
       [230.        ],
       [230.        ],
       [231.        ]])
[8]:
quantileTransformer_normal.references_
[8]:
array([0.        , 0.01010101, 0.02020202, 0.03030303, 0.04040404,
       0.05050505, 0.06060606, 0.07070707, 0.08080808, 0.09090909,
       0.1010101 , 0.11111111, 0.12121212, 0.13131313, 0.14141414,
       0.15151515, 0.16161616, 0.17171717, 0.18181818, 0.19191919,
       0.2020202 , 0.21212121, 0.22222222, 0.23232323, 0.24242424,
       0.25252525, 0.26262626, 0.27272727, 0.28282828, 0.29292929,
       0.3030303 , 0.31313131, 0.32323232, 0.33333333, 0.34343434,
       0.35353535, 0.36363636, 0.37373737, 0.38383838, 0.39393939,
       0.4040404 , 0.41414141, 0.42424242, 0.43434343, 0.44444444,
       0.45454545, 0.46464646, 0.47474747, 0.48484848, 0.49494949,
       0.50505051, 0.51515152, 0.52525253, 0.53535354, 0.54545455,
       0.55555556, 0.56565657, 0.57575758, 0.58585859, 0.5959596 ,
       0.60606061, 0.61616162, 0.62626263, 0.63636364, 0.64646465,
       0.65656566, 0.66666667, 0.67676768, 0.68686869, 0.6969697 ,
       0.70707071, 0.71717172, 0.72727273, 0.73737374, 0.74747475,
       0.75757576, 0.76767677, 0.77777778, 0.78787879, 0.7979798 ,
       0.80808081, 0.81818182, 0.82828283, 0.83838384, 0.84848485,
       0.85858586, 0.86868687, 0.87878788, 0.88888889, 0.8989899 ,
       0.90909091, 0.91919192, 0.92929293, 0.93939394, 0.94949495,
       0.95959596, 0.96969697, 0.97979798, 0.98989899, 1.        ])