Normalizer#
Este transformador se aplica comunmente a procesamiento de texto y clustering.
Normaliza cada muestra individualmente para que su norma sea unitaria.
La norma puede ser: ‘l2’, ‘l1’, ‘max’.
[1]:
import numpy as np
#
# tf: term-frequency
#
tf = np.array(
[
[3, 0, 1],
[2, 0, 0],
[3, 0, 0],
[4, 0, 0],
[3, 2, 0],
[3, 0, 2],
]
)
[2]:
from sklearn.preprocessing import Normalizer
normalizer_l2_norm = Normalizer(
# -------------------------------------------------------------------------
# The norm to use to normalize each non zero sample. If norm=’max’ is used,
# values will be rescaled by the maximum of the absolute values.
# 'l1', 'l2', 'max'
norm="l2",
)
normalizer_l2_norm.fit(tf)
tf_l2_norm = normalizer_l2_norm.transform(tf)
tf_l2_norm
[2]:
array([[0.9486833 , 0. , 0.31622777],
[1. , 0. , 0. ],
[1. , 0. , 0. ],
[1. , 0. , 0. ],
[0.83205029, 0.5547002 , 0. ],
[0.83205029, 0. , 0.5547002 ]])
[3]:
#
# Verificación
#
np.square(tf_l2_norm).sum(axis=1)
[3]:
array([1., 1., 1., 1., 1., 1.])
[4]:
normalizer_max_norm = Normalizer(
norm="max",
)
normalizer_max_norm.fit(tf)
tf_max_norm = normalizer_max_norm.transform(tf)
tf_max_norm
[4]:
array([[1. , 0. , 0.33333333],
[1. , 0. , 0. ],
[1. , 0. , 0. ],
[1. , 0. , 0. ],
[1. , 0.66666667, 0. ],
[1. , 0. , 0.66666667]])