OrdinalEncoder#

[1]:

import pandas as pd

X = [
    ["male", "from US", "uses Safari"],
    ["female", "from Europe", "uses Firefox"],
    ["non_binary", "from America", "uses Chrome"],
]

df = pd.DataFrame(X, columns=["sex", "from", "uses"])
df

[1]:

	sex	from	uses
0	male	from US	uses Safari
1	female	from Europe	uses Firefox
2	non_binary	from America	uses Chrome

[2]:

import numpy as np
from sklearn.preprocessing import OrdinalEncoder

ordinalEncoder = OrdinalEncoder(
    # -------------------------------------------------------------------------
    # Categories (unique values) per feature:
    # - ‘auto’ : Determine categories automatically from the training data.
    # - list : categories[i] holds the categories expected in the ith column.
    categories="auto",
    # -------------------------------------------------------------------------
    # Desired dtype of output.
    dtype=np.float64,
    # -------------------------------------------------------------------------
    # When set to ‘error’ an error will be raised in case an unknown
    # categorical feature is present during transform.
    # handle_unknown="error",
    # -------------------------------------------------------------------------
    # When the parameter handle_unknown is set to ‘use_encoded_value’, this
    # parameter is required and will set the encoded value of unknown
    # categories.
    # unknown_value=None,
)

ordinalEncoder.fit(X)
ordinalEncoder.transform(X)

[2]:

array([[1., 2., 2.],
       [0., 1., 1.],
       [2., 0., 0.]])

[3]:

ordinalEncoder.categories_

[3]:

[array(['female', 'male', 'non_binary'], dtype=object),
 array(['from America', 'from Europe', 'from US'], dtype=object),
 array(['uses Chrome', 'uses Firefox', 'uses Safari'], dtype=object)]