Estilo#
Última modificación: Mayo 14, 2022
Los comentarios son más difíciles de mantener que el código#
[ ]:
#
# En esta función el código cambio pero los comentarios no.
#
def append_titles(self, input_df, extract_titles):
df = input_df.copy()
if extract_titles:
# Extract title Mrs and Mr from the title,
# and set everything else to special
df["Title"] = df.Name.apply(
lambda x: re.search(r" ([A-Z][a-z]+)\.", x).group(1)
)
df["Title"] = df["Title"].replace({"Mlle": "Miss", "Mme": "Mrs", "Ms": "Miss"})
df["Title"] = df["Title"].replace(
[
"Don",
"Dona",
"Rev",
"Dr",
"Major",
"Lady",
"Sir",
"Col",
"Capt",
"Countess",
"Jonkheer",
],
"Special",
)
return df
No esconda mal código detrás de comentarios#
El código explica qué se hace.
Los comentarios deben explicar por qué se hace.
El código debe ser auto-explicativo#
[ ]:
# Check to see if the model finished training because of convergence
if model.train_epoch != MAX_EPOCH and model.last_loss_difference <= MIN_LOSS_DIFF:
pass
if model.training_converged(MAX_EPOCH, MIN_LOSS_DIFF):
pass
[ ]:
# Trains the model in a streaming fashion.
def train(input_data, batch_size=100):
pass
def train_from_streaming_data(input_data, batch_size=100):
pass
[ ]:
def append_age_categories(self, input_df, use_cut_points, use_quantiles):
"""
This function makes a copy of the input dataframe.
If the cut_points arg is true then the age column is bucketized.
If the quantiles arg is true then the age column is quantized.
The new column is assigned and the new dataframe is returned
"""
df = input_df.copy()
if use_cut_points and not use_quantiles:
cut_points = [-1, 0, 5, 12, 18, 35, 60, 100]
label_names = [
"Missing",
"Infant",
"Child" "Teenager",
"Young Adult",
"Adult",
"Senior",
]
df["Age_categories"] = pd.cut(df["Age"], cut_points, labels=label_names)
elif use_quantiles and not use_cut_points:
df["Age_categories"] = pd.qcut(df.Age, q=4, labels=False)
return df
def append_age_categories(self, input_df, use_cut_points, use_quantiles):
df = input_df.copy()
if use_cut_points and not use_quantiles:
df = self.bucketize_age(df)
elif use_quantiles and not use_cut_points:
df = self.quantize_age(df)
return df
Comentarios inutiles#
[ ]:
def load_features_from_file():
try:
df = pandas.read_csv(self.features_filepath)
except:
# No features_filepath means remote not initialized
raise Exception()
[ ]:
def upload_new_model_to_repository(self, input_model, repository, timeout):
# creates a new model location for the model
# if the repository is unable to create a new model location
# raise an exception
# Otherwise attempt to upload, and if that does not fail raise another
# exception
try:
repository.create_new_model_location(input_model.uuid)
if repository.is_model_location_set(input_model.uuid):
self.upload_model_to_repository(input_model, repository)
else:
raise Exception("Repository not available")
except:
raise Exception("Unable to upload model to repository")
Use PEP-8#
El código en el material de los cursos fue formateado con:
isort
black
Organización del código en el módulo#
Se recomienda empezar por lo más general y colocar las funciones en el orden en que se llaman.
[ ]:
def append_age_categories(self, input_df, use_cut_points, use_quantiles):
df = input_df.copy()
if use_cut_points and not use_quantiles:
df = self.bucketize_age(df)
elif use_quantiles and not use_cut_points:
df = self.quantize_age(df)
return df
def quantize_age(self, input_df):
df = input_df.copy()
df["Age_categories"] = pd.qcut(df.Age, q=4, labels=False)
return df
def bucketize_age(self, input_df):
df = input_df.copy()
cut_points = [-1, 0, 5, 12, 18, 35, 60, 100]
label_names = [
"Missing",
"Infant",
"Child",
"Teenager",
"Young Adult",
"Adult",
"Senior",
]
df["Age_categories"] = pd.cut(df["Age"], cut_points, labels=label_names)
return df
Dedentación e indentación#
[ ]:
#
# SQL query hugging the left of the editor
#
def get_data_from_db():
query = """
SELECT feature_1, feature_2
FROM products
WHERE feature_3 IN
(
SELECT feature_4
FROM orders
INNER JOIN promotions
ON orders.product_id = promotions.product_id
)
"""
part_of_the_function()
not_part_of_the_function()
[ ]:
from textwrap import dedent
#
# SQL query with extra spaces.
# If the more readable query creates issue swith the DB,
# then we dedent the query to remove the extra indentation.
#
def get_data_from_db():
query = dedent(
"""
SELECT feature_1 , feature_2
FROM products
WHERE feature_3 IN
(
SELECT feature_4
FROM orders
INNER JOIN promotions
ON orders.product_id = promotions.product_id
)
"""
)