QuantileTransformer#
Este método transforma las características para que sigan una distribución normal o uniforme.
Tiende a dispersar los valores más frecuentes.
Reduce el impacto de los outliers.
Se aplica a cada columna del dataset de forma independiente.
Cada columna es transformada usando la siguiente fórmula:
G^{-1}(F(X))
donde F es la distribución acumulada de la características y G^{-1} es la función cuartil de la distribución deseada G.
[1]:
import seaborn as sns
penguins = sns.load_dataset("penguins")
data = penguins[["flipper_length_mm"]]
data = data.rename(columns={"flipper_length_mm": "original"})
[2]:
from sklearn.preprocessing import QuantileTransformer
quantileTransformer_uniform = QuantileTransformer(
# -------------------------------------------------------------------------
# Number of quantiles to be computed
n_quantiles=100,
# -------------------------------------------------------------------------
# 'uniform' / 'normal'
output_distribution="uniform",
# -------------------------------------------------------------------------
# Maximum number of samples used to estimate the quantiles for
# computational efficiency.
subsample=100000,
# -------------------------------------------------------------------------
# Determines random number generation for subsampling and smoothing noise.
random_state=None,
)
quantileTransformer_uniform.fit(data[["original"]])
data["transformed_uniform"] = quantileTransformer_uniform.transform(data[["original"]])
[3]:
quantileTransformer_normal = QuantileTransformer(
n_quantiles=100,
output_distribution="normal",
)
quantileTransformer_normal.fit(data[["original"]])
data["transformed_normal"] = quantileTransformer_normal.transform(data[["original"]])
[4]:
g = sns.jointplot(x="original", y="transformed_uniform", data=data, kind="scatter")
g.fig.set_figwidth(3)
g.fig.set_figheight(3)
[5]:
g = sns.jointplot(x="original", y="transformed_normal", data=data, kind="scatter")
g.fig.set_figwidth(3)
g.fig.set_figheight(3)
[6]:
quantileTransformer_normal.n_quantiles_
[6]:
100
[7]:
quantileTransformer_normal.quantiles_
[7]:
array([[172. ],
[178. ],
[178.88888889],
[180. ],
[181. ],
[181. ],
[182. ],
[183.11111111],
[184. ],
[184. ],
[185. ],
[185. ],
[186. ],
[186. ],
[187. ],
[187. ],
[187. ],
[187. ],
[187. ],
[188. ],
[188. ],
[189. ],
[189. ],
[190. ],
[190. ],
[190. ],
[190. ],
[190. ],
[190. ],
[191. ],
[191. ],
[191. ],
[191. ],
[192. ],
[192. ],
[193. ],
[193. ],
[193. ],
[193. ],
[194. ],
[194. ],
[195. ],
[195. ],
[195. ],
[195. ],
[195. ],
[196. ],
[196. ],
[196.33333333],
[197. ],
[197. ],
[197.66666667],
[198. ],
[198. ],
[199. ],
[199.44444444],
[200. ],
[201. ],
[201.77777778],
[202.22222222],
[203. ],
[205. ],
[207. ],
[208. ],
[208. ],
[209. ],
[209.33333333],
[210. ],
[210. ],
[210. ],
[210.11111111],
[212. ],
[212. ],
[213. ],
[213. ],
[214. ],
[214. ],
[215. ],
[215. ],
[215. ],
[216. ],
[216. ],
[216.44444444],
[217. ],
[218. ],
[218. ],
[219. ],
[220. ],
[220. ],
[220.55555556],
[221. ],
[222. ],
[222. ],
[224. ],
[225. ],
[226.44444444],
[228. ],
[230. ],
[230. ],
[231. ]])
[8]:
quantileTransformer_normal.references_
[8]:
array([0. , 0.01010101, 0.02020202, 0.03030303, 0.04040404,
0.05050505, 0.06060606, 0.07070707, 0.08080808, 0.09090909,
0.1010101 , 0.11111111, 0.12121212, 0.13131313, 0.14141414,
0.15151515, 0.16161616, 0.17171717, 0.18181818, 0.19191919,
0.2020202 , 0.21212121, 0.22222222, 0.23232323, 0.24242424,
0.25252525, 0.26262626, 0.27272727, 0.28282828, 0.29292929,
0.3030303 , 0.31313131, 0.32323232, 0.33333333, 0.34343434,
0.35353535, 0.36363636, 0.37373737, 0.38383838, 0.39393939,
0.4040404 , 0.41414141, 0.42424242, 0.43434343, 0.44444444,
0.45454545, 0.46464646, 0.47474747, 0.48484848, 0.49494949,
0.50505051, 0.51515152, 0.52525253, 0.53535354, 0.54545455,
0.55555556, 0.56565657, 0.57575758, 0.58585859, 0.5959596 ,
0.60606061, 0.61616162, 0.62626263, 0.63636364, 0.64646465,
0.65656566, 0.66666667, 0.67676768, 0.68686869, 0.6969697 ,
0.70707071, 0.71717172, 0.72727273, 0.73737374, 0.74747475,
0.75757576, 0.76767677, 0.77777778, 0.78787879, 0.7979798 ,
0.80808081, 0.81818182, 0.82828283, 0.83838384, 0.84848485,
0.85858586, 0.86868687, 0.87878788, 0.88888889, 0.8989899 ,
0.90909091, 0.91919192, 0.92929293, 0.93939394, 0.94949495,
0.95959596, 0.96969697, 0.97979798, 0.98989899, 1. ])