OrdinalEncoder#
[1]:
import pandas as pd
X = [
["male", "from US", "uses Safari"],
["female", "from Europe", "uses Firefox"],
["non_binary", "from America", "uses Chrome"],
]
df = pd.DataFrame(X, columns=["sex", "from", "uses"])
df
[1]:
sex | from | uses | |
---|---|---|---|
0 | male | from US | uses Safari |
1 | female | from Europe | uses Firefox |
2 | non_binary | from America | uses Chrome |
[2]:
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
ordinalEncoder = OrdinalEncoder(
# -------------------------------------------------------------------------
# Categories (unique values) per feature:
# - ‘auto’ : Determine categories automatically from the training data.
# - list : categories[i] holds the categories expected in the ith column.
categories="auto",
# -------------------------------------------------------------------------
# Desired dtype of output.
dtype=np.float64,
# -------------------------------------------------------------------------
# When set to ‘error’ an error will be raised in case an unknown
# categorical feature is present during transform.
# handle_unknown="error",
# -------------------------------------------------------------------------
# When the parameter handle_unknown is set to ‘use_encoded_value’, this
# parameter is required and will set the encoded value of unknown
# categories.
# unknown_value=None,
)
ordinalEncoder.fit(X)
ordinalEncoder.transform(X)
[2]:
array([[1., 2., 2.],
[0., 1., 1.],
[2., 0., 0.]])
[3]:
ordinalEncoder.categories_
[3]:
[array(['female', 'male', 'non_binary'], dtype=object),
array(['from America', 'from Europe', 'from US'], dtype=object),
array(['uses Chrome', 'uses Firefox', 'uses Safari'], dtype=object)]