OrdinalEncoder#

[1]:
import pandas as pd

X = [
    ["male", "from US", "uses Safari"],
    ["female", "from Europe", "uses Firefox"],
    ["non_binary", "from America", "uses Chrome"],
]

df = pd.DataFrame(X, columns=["sex", "from", "uses"])
df
[1]:
sex from uses
0 male from US uses Safari
1 female from Europe uses Firefox
2 non_binary from America uses Chrome
[2]:
import numpy as np
from sklearn.preprocessing import OrdinalEncoder

ordinalEncoder = OrdinalEncoder(
    # -------------------------------------------------------------------------
    # Categories (unique values) per feature:
    # - ‘auto’ : Determine categories automatically from the training data.
    # - list : categories[i] holds the categories expected in the ith column.
    categories="auto",
    # -------------------------------------------------------------------------
    # Desired dtype of output.
    dtype=np.float64,
    # -------------------------------------------------------------------------
    # When set to ‘error’ an error will be raised in case an unknown
    # categorical feature is present during transform.
    # handle_unknown="error",
    # -------------------------------------------------------------------------
    # When the parameter handle_unknown is set to ‘use_encoded_value’, this
    # parameter is required and will set the encoded value of unknown
    # categories.
    # unknown_value=None,
)

ordinalEncoder.fit(X)
ordinalEncoder.transform(X)
[2]:
array([[1., 2., 2.],
       [0., 1., 1.],
       [2., 0., 0.]])
[3]:
ordinalEncoder.categories_
[3]:
[array(['female', 'male', 'non_binary'], dtype=object),
 array(['from America', 'from Europe', 'from US'], dtype=object),
 array(['uses Chrome', 'uses Firefox', 'uses Safari'], dtype=object)]