Normalizer#

  • Este transformador se aplica comunmente a procesamiento de texto y clustering.

  • Normaliza cada muestra individualmente para que su norma sea unitaria.

  • La norma puede ser: ‘l2’, ‘l1’, ‘max’.

[1]:
import numpy as np

#
# tf: term-frequency
#
tf = np.array(
    [
        [3, 0, 1],
        [2, 0, 0],
        [3, 0, 0],
        [4, 0, 0],
        [3, 2, 0],
        [3, 0, 2],
    ]
)
[2]:
from sklearn.preprocessing import Normalizer

normalizer_l2_norm = Normalizer(
    # -------------------------------------------------------------------------
    # The norm to use to normalize each non zero sample. If norm=’max’ is used,
    # values will be rescaled by the maximum of the absolute values.
    # 'l1', 'l2', 'max'
    norm="l2",
)
normalizer_l2_norm.fit(tf)

tf_l2_norm = normalizer_l2_norm.transform(tf)
tf_l2_norm
[2]:
array([[0.9486833 , 0.        , 0.31622777],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.83205029, 0.5547002 , 0.        ],
       [0.83205029, 0.        , 0.5547002 ]])
[3]:
#
# Verificación
#
np.square(tf_l2_norm).sum(axis=1)
[3]:
array([1., 1., 1., 1., 1., 1.])
[4]:
normalizer_max_norm = Normalizer(
    norm="max",
)
normalizer_max_norm.fit(tf)

tf_max_norm = normalizer_max_norm.transform(tf)
tf_max_norm
[4]:
array([[1.        , 0.        , 0.33333333],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [1.        , 0.66666667, 0.        ],
       [1.        , 0.        , 0.66666667]])