OrdinalEncoder#
[1]:
import pandas as pd
X = [
    ["male", "from US", "uses Safari"],
    ["female", "from Europe", "uses Firefox"],
    ["non_binary", "from America", "uses Chrome"],
]
df = pd.DataFrame(X, columns=["sex", "from", "uses"])
df
[1]:
| sex | from | uses | |
|---|---|---|---|
| 0 | male | from US | uses Safari | 
| 1 | female | from Europe | uses Firefox | 
| 2 | non_binary | from America | uses Chrome | 
[2]:
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
ordinalEncoder = OrdinalEncoder(
    # -------------------------------------------------------------------------
    # Categories (unique values) per feature:
    # - ‘auto’ : Determine categories automatically from the training data.
    # - list : categories[i] holds the categories expected in the ith column.
    categories="auto",
    # -------------------------------------------------------------------------
    # Desired dtype of output.
    dtype=np.float64,
    # -------------------------------------------------------------------------
    # When set to ‘error’ an error will be raised in case an unknown
    # categorical feature is present during transform.
    # handle_unknown="error",
    # -------------------------------------------------------------------------
    # When the parameter handle_unknown is set to ‘use_encoded_value’, this
    # parameter is required and will set the encoded value of unknown
    # categories.
    # unknown_value=None,
)
ordinalEncoder.fit(X)
ordinalEncoder.transform(X)
[2]:
array([[1., 2., 2.],
       [0., 1., 1.],
       [2., 0., 0.]])
[3]:
ordinalEncoder.categories_
[3]:
[array(['female', 'male', 'non_binary'], dtype=object),
 array(['from America', 'from Europe', 'from US'], dtype=object),
 array(['uses Chrome', 'uses Firefox', 'uses Safari'], dtype=object)]