KBinsDiscretizer#
[1]:
import numpy as np
X = np.array(
[
[0.0, 4.0, 12],
[1.0, 5.0, 15],
[2.0, 6.0, 14],
[3.0, 3.0, 11],
[4.0, 7.0, 11],
[5.0, 3.0, 13],
[6.0, 7.0, 11],
[7.0, 6.0, 14],
[8.0, 3.0, 18],
[9.0, 2.0, 13],
[10.0, 1.0, 14],
]
)
X
[1]:
array([[ 0., 4., 12.],
[ 1., 5., 15.],
[ 2., 6., 14.],
[ 3., 3., 11.],
[ 4., 7., 11.],
[ 5., 3., 13.],
[ 6., 7., 11.],
[ 7., 6., 14.],
[ 8., 3., 18.],
[ 9., 2., 13.],
[10., 1., 14.]])
[2]:
from sklearn.preprocessing import KBinsDiscretizer
kBinsDiscretizer = KBinsDiscretizer(
# -------------------------------------------------------------------------
# The number of bins to produce. Raises ValueError if n_bins < 2.
n_bins=[3, 2, 2],
# -------------------------------------------------------------------------
# Method used to encode the transformed result.
# - 'onehot'
# - 'onehot-dense'
# - 'ordinal'
encode="ordinal",
# -------------------------------------------------------------------------
# Strategy used to define the widths of the bins.
# - 'uniform': All bins in each feature have identical widths.
# - 'quantile': All bins in each feature have the same number of points.
# - 'kmeans': Values in each bin have the same nearest center of a 1D
# k-means cluster.
strategy="quantile",
# -------------------------------------------------------------------------
# The desired data-type for the output.
# - np.float32
# - np.float64
# dtype=None,
)
kBinsDiscretizer.fit(X)
kBinsDiscretizer.transform(X)
[2]:
array([[0., 1., 0.],
[0., 1., 1.],
[0., 1., 1.],
[0., 0., 0.],
[1., 1., 0.],
[1., 0., 1.],
[1., 1., 0.],
[2., 1., 1.],
[2., 0., 1.],
[2., 0., 1.],
[2., 0., 1.]])
[3]:
kBinsDiscretizer.bin_edges_
[3]:
array([array([ 0. , 3.33333333, 6.66666667, 10. ]),
array([1., 4., 7.]), array([11., 13., 18.])], dtype=object)
[4]:
kBinsDiscretizer.n_bins_
[4]:
array([3, 2, 2])