Validación de objetos de Pandas#

  • Ultima modificación: Mayo 14, 2022

DataFrame#

Default Index

[1]:
%%writefile /tmp/validation_example.py

import pandas as pd

import datatest as dt

dt.register_accessors()

df = pd.DataFrame(data={"A": ["foo", "bar", "baz", "qux"], "B": [10, 20, "x", "y"]})


requirement = [
    ("foo", 10),
    ("bar", 20),
    ("baz", "x"),
    ("qux", "y"),
]

df.validate(requirement)
Overwriting /tmp/validation_example.py
[2]:
!python3 /tmp/validation_example.py
[3]:
%%writefile /tmp/validation_example.py

import pandas as pd

import datatest as dt

dt.register_accessors()

df = pd.DataFrame(data={"A": ["foo", "bar", "baz", "qux"], "B": [10, 20, "x", "y"]})


df.validate((str, int))
Overwriting /tmp/validation_example.py
[4]:
!python3 /tmp/validation_example.py
Traceback (most recent call last):
  File "/tmp/validation_example.py", line 11, in <module>
    df.validate((str, int))
datatest.ValidationError: does not satisfy `(str, int)` (2 differences): [
    Invalid(('baz', 'x')),
    Invalid(('qux', 'y')),
]

Specified Index

[5]:
%%writefile /tmp/validation_example.py

import pandas as pd

import datatest as dt

dt.register_accessors()

df = pd.DataFrame(
    data={"A": ["foo", "bar", "baz", "qux"], "B": [10, 20, "x", "y"]},
    index=["I", "II", "III", "IV"],
)

requirement = {
    "I": ("foo", 10),
    "II": ("bar", 20),
    "III": ("baz", "x"),
    "IV": ("qux", "y"),
}

df.validate(requirement)
Overwriting /tmp/validation_example.py
[6]:
!python3 /tmp/validation_example.py
[7]:
%%writefile /tmp/validation_example.py

import pandas as pd

import datatest as dt

dt.register_accessors()

df = pd.DataFrame(
    data={"A": ["foo", "bar", "baz", "qux"], "B": [10, 20, "x", "y"]},
    index=["I", "II", "III", "IV"],
)

df.validate((str, int))
Overwriting /tmp/validation_example.py
[8]:
!python3 /tmp/validation_example.py
Traceback (most recent call last):
  File "/tmp/validation_example.py", line 13, in <module>
    df.validate((str, int))
datatest.ValidationError: does not satisfy `(str, int)` (2 differences): {
    'III': Invalid(('baz', 'x')),
    'IV': Invalid(('qux', 'y')),
}

Series#

Default Index

[9]:
%%writefile /tmp/validation_example.py

import pandas as pd

import datatest as dt


dt.register_accessors()

s = pd.Series(data=[10, 20, "x", "y"])

requirement = [10, 20, "x", "y"]

s.validate(requirement)
Overwriting /tmp/validation_example.py
[10]:
!python3 /tmp/validation_example.py
[11]:
%%writefile /tmp/validation_example.py

import pandas as pd

import datatest as dt

dt.register_accessors()

s = pd.Series(data=[10, 20, "x", "y"])

s.validate(int)
Overwriting /tmp/validation_example.py
[12]:
!python3 /tmp/validation_example.py
Traceback (most recent call last):
  File "/tmp/validation_example.py", line 10, in <module>
    s.validate(int)
datatest.ValidationError: does not satisfy `int` (2 differences): [
    Invalid('x'),
    Invalid('y'),
]

Specified Index

[13]:
%%writefile /tmp/validation_example.py

import pandas as pd

import datatest as dt

dt.register_accessors()

s = pd.Series(data=[10, 20, 'x', 'y'],
              index=['I', 'II', 'III', 'IV'])

requirement = {'I': 10, 'II': 20, 'III': 'x', 'IV': 'y'}

s.validate(requirement)
Overwriting /tmp/validation_example.py
[14]:
!python3 /tmp/validation_example.py
[15]:
%%writefile /tmp/validation_example.py

import pandas as pd

import datatest as dt

dt.register_accessors()

s = pd.Series(data=[10, 20, 'x', 'y'],
              index=['I', 'II', 'III', 'IV'])

s.validate(int)
Overwriting /tmp/validation_example.py
[16]:
!python3 /tmp/validation_example.py
Traceback (most recent call last):
  File "/tmp/validation_example.py", line 11, in <module>
    s.validate(int)
datatest.ValidationError: does not satisfy `int` (2 differences): {
    'III': Invalid('x'),
    'IV': Invalid('y'),
}

Index & MultiIndex#

[17]:
%%writefile /tmp/validation_example.py

import pandas as pd

import datatest as dt

dt.register_accessors()

index = pd.Index(['I', 'II', 'III', 'IV'])
requirement = ['I', 'II', 'III', 'IV']
index.validate(requirement)

multi = pd.MultiIndex.from_tuples([
    ('I', 'a'),
    ('II', 'b'),
    ('III', 'c'),
    ('IV', 'd'),
])
requirement = [('I', 'a'), ('II', 'b'), ('III', 'c'), ('IV', 'd')]
multi.validate(requirement)
Overwriting /tmp/validation_example.py
[18]:
!python3 /tmp/validation_example.py