Verificación de rangos de fecha#

  • Ultima actualización: Mar 6, 2023 | YouTube

[1]:
import pandas as pd

pd.__version__
[1]:
'1.5.2'
[2]:
%%writefile /tmp/data.csv
eventId,eventDate
1,2012-01-10
2,1900-12-23
3,2018-09-17
4,2019-11-15
5,2020-04-23
6,2025-07-03
7,2020-02-17
8,2017-08-12
9,2015-06-24
Overwriting /tmp/data.csv
[3]:
df = pd.read_csv("/tmp/data.csv")

display(df, df.dtypes)
eventId eventDate
0 1 2012-01-10
1 2 1900-12-23
2 3 2018-09-17
3 4 2019-11-15
4 5 2020-04-23
5 6 2025-07-03
6 7 2020-02-17
7 8 2017-08-12
8 9 2015-06-24
eventId       int64
eventDate    object
dtype: object
[4]:
#
# Cambio del tipo de dato de 'eventDate' a datetime
#
df.eventDate = pd.to_datetime(df.eventDate)

display(df, df.dtypes)
eventId eventDate
0 1 2012-01-10
1 2 1900-12-23
2 3 2018-09-17
3 4 2019-11-15
4 5 2020-04-23
5 6 2025-07-03
6 7 2020-02-17
7 8 2017-08-12
8 9 2015-06-24
eventId               int64
eventDate    datetime64[ns]
dtype: object

Rango de fechas:

1950-01-01
Fecha actual
[5]:
import datetime as dt

today = pd.to_datetime(dt.date.today())
today
[5]:
Timestamp('2022-12-09 00:00:00')
[6]:
#
# Verificación de restricciones
#
df[(df.eventDate < pd.to_datetime("1950-01-01")) | (df.eventDate > today)]
[6]:
eventId eventDate
1 2 1900-12-23
5 6 2025-07-03
[7]:
df.loc[
    df.eventDate < "1950-01-01",
    "eventDate",
] = pd.to_datetime("1950-01-01")


df.loc[df.eventDate > today, "eventDate"] = today

display(df, df.dtypes)
eventId eventDate
0 1 2012-01-10
1 2 1950-01-01
2 3 2018-09-17
3 4 2019-11-15
4 5 2020-04-23
5 6 2022-12-09
6 7 2020-02-17
7 8 2017-08-12
8 9 2015-06-24
eventId               int64
eventDate    datetime64[ns]
dtype: object