Storage format#

Ultima modificación: Mayo 14, 2022

https://www.mlflow.org/docs/latest/models.html

Código base#

[1]:

def load_data():

    import pandas as pd

    url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    df = pd.read_csv(url, sep=";")

    y = df["quality"]
    x = df.copy()
    x.pop("quality")

    return x, y

def make_train_test_split(x, y):

    from sklearn.model_selection import train_test_split

    (x_train, x_test, y_train, y_test) = train_test_split(
        x,
        y,
        test_size=0.25,
        random_state=123456,
    )
    return x_train, x_test, y_train, y_test

def eval_metrics(y_true, y_pred):

    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    return mse, mae, r2

def report(estimator, mse, mae, r2):

    print(estimator, ":", sep="")
    print(f"  MSE: {mse}")
    print(f"  MAE: {mae}")
    print(f"  R2: {r2}")

def train_estimator(alpha=0.5, l1_ratio=0.5, verbose=1):

    import mlflow.sklearn
    from sklearn.linear_model import ElasticNet

    import mlflow

    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)

    print('Tracking directory:', mlflow.get_tracking_uri())

    with mlflow.start_run():

        estimator = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=12345)
        estimator.fit(x_train, y_train)
        mse, mae, r2 = eval_metrics(y_test, y_pred=estimator.predict(x_test))
        if verbose > 0:
            report(estimator, mse, mae, r2)

        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)

        mlflow.log_metric("mse", mse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)

        mlflow.sklearn.log_model(estimator, "model")

        # -------------------------------------------------------------------------------
        #
        # Guardado del modelo para posible ejecución.
        # Crea el directori
        #
        mlflow.sklearn.save_model(estimator, "/tmp/my_model")

Almacenamiento del modelo de prueba#

[2]:

!ls /tmp

/tmp

[3]:

!rm -rf mlruns
train_estimator(0.1, 0.05)

Tracking directory: file:///Volumes/GitHub/courses-source/notebooks/mlflow/mlruns
ElasticNet(alpha=0.1, l1_ratio=0.05, random_state=12345):
  MSE: 0.48683363717622585
  MAE: 0.5493759222336462
  R2: 0.30150487868829456

/Volumes/GitHub/courses-source/notebooks/mlflow/.venv/lib/python3.8/site-packages/setuptools/distutils_patch.py:25: UserWarning: Distutils was imported before Setuptools. This usage is discouraged and may exhibit undesirable behaviors or errors. Please use Setuptools' objects directly or at least import Setuptools first.
  warnings.warn(

[4]:

!ls /tmp

/tmp

[5]:

!ls -1 /tmp/my_model/

MLmodel
conda.yaml
model.pkl
python_env.yaml
requirements.txt

Contenido de los archivos#

[6]:

#
# Especificación del modelo
#
!cat /tmp/my_model/MLmodel

flavors:
  python_function:
    env: conda.yaml
    loader_module: mlflow.sklearn
    model_path: model.pkl
    python_version: 3.8.9
  sklearn:
    code: null
    pickled_model: model.pkl
    serialization_format: cloudpickle
    sklearn_version: 1.1.1
mlflow_version: 1.26.1
model_uuid: b5676e4da4b5402680a0b8df5a8e76ee
utc_time_created: '2022-06-04 03:45:32.343915'

[7]:

#
# Especificación del ambiente
#
!cat /tmp/my_model/conda.yaml

channels:
- conda-forge
dependencies:
- python=3.8.9
- pip<=22.1.2
- pip:
  - mlflow
  - cloudpickle==2.1.0
  - psutil==5.9.1
  - scikit-learn==1.1.1
name: mlflow-env

[8]:

#
# Especificación de requerimientos
#
!cat /tmp/my_model/requirements.txt

mlflow
cloudpickle==2.1.0
psutil==5.9.1
scikit-learn==1.1.1

Uso de un modelo almacenado cargando directamente el modelo#

[9]:

def check_estimator():

    import mlflow

    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)

    # -------------------------------------------------------------------------
    # Se carga directamente de la carpeta en que se almacenó en el código
    # anterior
    estimator_path = "/tmp/my_model"
    # -------------------------------------------------------------------------

    estimator = mlflow.pyfunc.load_model(estimator_path)
    mse, mae, r2 = eval_metrics(y_test, y_pred=estimator.predict(x_test))
    report(estimator, mse, mae, r2)


#
# Debe coincidir con el mejor modelo encontrado en la celdas anteriores
#
check_estimator()

mlflow.pyfunc.loaded_model:
  flavor: mlflow.sklearn
:
  MSE: 0.48683363717622585
  MAE: 0.5493759222336462
  R2: 0.30150487868829456

Uso del modelo con models serve#

[10]:

def get_json_test_data():

    x, y = load_data()
    x_train, x_test, y_train, y_test = make_train_test_split(x, y)

    data = x_test.iloc[0:10,:].to_json(orient='split')

    data = repr(data)
    return data

data = get_json_test_data()
data

[10]:

'\'{"columns":["fixed acidity","volatile acidity","citric acid","residual sugar","chlorides","free sulfur dioxide","total sulfur dioxide","density","pH","sulphates","alcohol"],"index":[1105,553,1339,1591,1552,1479,371,631,1559,741],"data":[[6.3,0.57,0.28,2.1,0.048,13.0,49.0,0.99374,3.41,0.6,12.8],[5.0,1.04,0.24,1.6,0.05,32.0,96.0,0.9934,3.74,0.62,11.5],[7.5,0.51,0.02,1.7,0.084,13.0,31.0,0.99538,3.36,0.54,10.5],[5.4,0.74,0.09,1.7,0.089,16.0,26.0,0.99402,3.67,0.56,11.6],[6.3,0.68,0.01,3.7,0.103,32.0,54.0,0.99586,3.51,0.66,11.3],[8.2,0.28,0.6,3.0,0.104,10.0,22.0,0.99828,3.39,0.68,10.6],[7.9,0.24,0.4,1.6,0.056,11.0,25.0,0.9967,3.32,0.87,8.7],[10.4,0.28,0.54,2.7,0.105,5.0,19.0,0.9988,3.25,0.63,9.5],[7.8,0.6,0.26,2.0,0.08,31.0,131.0,0.99622,3.21,0.52,9.9],[9.2,0.53,0.24,2.6,0.078,28.0,139.0,0.99788,3.21,0.57,9.5]]}\''

Ejecutar en la consola:

mlflow models serve --no-conda -m /tmp/my_model

assets/mlflow-model-1-serve.png

[11]:

!curl http://127.0.0.1:5000/invocations -H 'Content-Type: application/json' -d {data}