Estilo#

Última modificación: Mayo 14, 2022

Los comentarios son más difíciles de mantener que el código#

[ ]:

#
# En esta función el código cambio pero los comentarios no.
#
def append_titles(self, input_df, extract_titles):
    df = input_df.copy()
    if extract_titles:
        # Extract title Mrs and Mr from the title,
        # and set everything else to special
        df["Title"] = df.Name.apply(
            lambda x: re.search(r" ([A-Z][a-z]+)\.", x).group(1)
        )
        df["Title"] = df["Title"].replace({"Mlle": "Miss", "Mme": "Mrs", "Ms": "Miss"})
        df["Title"] = df["Title"].replace(
            [
                "Don",
                "Dona",
                "Rev",
                "Dr",
                "Major",
                "Lady",
                "Sir",
                "Col",
                "Capt",
                "Countess",
                "Jonkheer",
            ],
            "Special",
        )


return df

No esconda mal código detrás de comentarios#

El código explica qué se hace.
Los comentarios deben explicar por qué se hace.

El código debe ser auto-explicativo#

[ ]:

# Check to see if the model finished training because of convergence
if model.train_epoch != MAX_EPOCH and model.last_loss_difference <= MIN_LOSS_DIFF:
    pass


if model.training_converged(MAX_EPOCH, MIN_LOSS_DIFF):
    pass

[ ]:

# Trains the model in a streaming fashion.
def train(input_data, batch_size=100):
    pass


def train_from_streaming_data(input_data, batch_size=100):
    pass

[ ]:

def append_age_categories(self, input_df, use_cut_points, use_quantiles):
    """
    This function makes a copy of the input dataframe.
    If the cut_points arg is true then the age column is bucketized.
    If the quantiles arg is true then the age column is quantized.
    The new column is assigned and the new dataframe is returned
    """
    df = input_df.copy()
    if use_cut_points and not use_quantiles:
        cut_points = [-1, 0, 5, 12, 18, 35, 60, 100]
        label_names = [
            "Missing",
            "Infant",
            "Child" "Teenager",
            "Young Adult",
            "Adult",
            "Senior",
        ]
        df["Age_categories"] = pd.cut(df["Age"], cut_points, labels=label_names)
    elif use_quantiles and not use_cut_points:
        df["Age_categories"] = pd.qcut(df.Age, q=4, labels=False)
    return df


def append_age_categories(self, input_df, use_cut_points, use_quantiles):
    df = input_df.copy()
    if use_cut_points and not use_quantiles:
        df = self.bucketize_age(df)
    elif use_quantiles and not use_cut_points:
        df = self.quantize_age(df)
    return df

Comentarios inutiles#

[ ]:

def load_features_from_file():
    try:
        df = pandas.read_csv(self.features_filepath)
    except:
        # No features_filepath means remote not initialized
    raise Exception()

[ ]:

def upload_new_model_to_repository(self, input_model, repository, timeout):
    # creates a new model location for the model
    # if the repository is unable to create a new model location
    # raise an exception
    # Otherwise attempt to upload, and if that does not fail raise another
    # exception
    try:
        repository.create_new_model_location(input_model.uuid)
        if repository.is_model_location_set(input_model.uuid):
            self.upload_model_to_repository(input_model, repository)
        else:
            raise Exception("Repository not available")
    except:
        raise Exception("Unable to upload model to repository")

Use PEP-8#

El código en el material de los cursos fue formateado con:

isort
black

Organización del código en el módulo#

Se recomienda empezar por lo más general y colocar las funciones en el orden en que se llaman.

[ ]:

def append_age_categories(self, input_df, use_cut_points, use_quantiles):
    df = input_df.copy()
    if use_cut_points and not use_quantiles:
        df = self.bucketize_age(df)
    elif use_quantiles and not use_cut_points:
        df = self.quantize_age(df)
    return df


def quantize_age(self, input_df):
    df = input_df.copy()
    df["Age_categories"] = pd.qcut(df.Age, q=4, labels=False)
    return df


def bucketize_age(self, input_df):
    df = input_df.copy()
    cut_points = [-1, 0, 5, 12, 18, 35, 60, 100]
    label_names = [
        "Missing",
        "Infant",
        "Child",
        "Teenager",
        "Young Adult",
        "Adult",
        "Senior",
    ]
    df["Age_categories"] = pd.cut(df["Age"], cut_points, labels=label_names)
    return df

Dedentación e indentación#

[ ]:

#
# SQL query hugging the left of the editor
#
def get_data_from_db():
    query = """
SELECT feature_1, feature_2
FROM products
WHERE feature_3 IN
(
    SELECT feature_4
    FROM orders
    INNER JOIN promotions
    ON orders.product_id = promotions.product_id
)
"""
    part_of_the_function()


not_part_of_the_function()

[ ]:

from textwrap import dedent


#
# SQL query with extra spaces.
# If the more readable query creates issue swith the DB,
# then we dedent the query to remove the extra indentation.
#
def get_data_from_db():
    query = dedent(
        """
        SELECT feature_1 , feature_2
        FROM products
        WHERE feature_3 IN
        (
            SELECT feature_4
            FROM orders
            INNER JOIN promotions
            ON orders.product_id = promotions.product_id
            )
    """
    )