Verificación de rangos de fecha#
Ultima actualización: Mar 6, 2023 | YouTube
[1]:
import pandas as pd
pd.__version__
[1]:
'1.5.2'
[2]:
%%writefile /tmp/data.csv
eventId,eventDate
1,2012-01-10
2,1900-12-23
3,2018-09-17
4,2019-11-15
5,2020-04-23
6,2025-07-03
7,2020-02-17
8,2017-08-12
9,2015-06-24
Overwriting /tmp/data.csv
[3]:
df = pd.read_csv("/tmp/data.csv")
display(df, df.dtypes)
eventId | eventDate | |
---|---|---|
0 | 1 | 2012-01-10 |
1 | 2 | 1900-12-23 |
2 | 3 | 2018-09-17 |
3 | 4 | 2019-11-15 |
4 | 5 | 2020-04-23 |
5 | 6 | 2025-07-03 |
6 | 7 | 2020-02-17 |
7 | 8 | 2017-08-12 |
8 | 9 | 2015-06-24 |
eventId int64
eventDate object
dtype: object
[4]:
#
# Cambio del tipo de dato de 'eventDate' a datetime
#
df.eventDate = pd.to_datetime(df.eventDate)
display(df, df.dtypes)
eventId | eventDate | |
---|---|---|
0 | 1 | 2012-01-10 |
1 | 2 | 1900-12-23 |
2 | 3 | 2018-09-17 |
3 | 4 | 2019-11-15 |
4 | 5 | 2020-04-23 |
5 | 6 | 2025-07-03 |
6 | 7 | 2020-02-17 |
7 | 8 | 2017-08-12 |
8 | 9 | 2015-06-24 |
eventId int64
eventDate datetime64[ns]
dtype: object
Rango de fechas:
1950-01-01
Fecha actual
[5]:
import datetime as dt
today = pd.to_datetime(dt.date.today())
today
[5]:
Timestamp('2022-12-09 00:00:00')
[6]:
#
# Verificación de restricciones
#
df[(df.eventDate < pd.to_datetime("1950-01-01")) | (df.eventDate > today)]
[6]:
eventId | eventDate | |
---|---|---|
1 | 2 | 1900-12-23 |
5 | 6 | 2025-07-03 |
[7]:
df.loc[
df.eventDate < "1950-01-01",
"eventDate",
] = pd.to_datetime("1950-01-01")
df.loc[df.eventDate > today, "eventDate"] = today
display(df, df.dtypes)
eventId | eventDate | |
---|---|---|
0 | 1 | 2012-01-10 |
1 | 2 | 1950-01-01 |
2 | 3 | 2018-09-17 |
3 | 4 | 2019-11-15 |
4 | 5 | 2020-04-23 |
5 | 6 | 2022-12-09 |
6 | 7 | 2020-02-17 |
7 | 8 | 2017-08-12 |
8 | 9 | 2015-06-24 |
eventId int64
eventDate datetime64[ns]
dtype: object