Testing Automático#
Ultima modificación: Mayo 14, 2022
Archivos de ejemplo#
[ ]:
%%writefile /tmp/example.py
"A","B","C"
"x","foo",20
"x","foo",30
"y","foo",10
"y","bar",20
"z","bar",10
"z","bar",10
[ ]:
Pytest#
Pandas
#!/usr/bin/env python3
import pytest
import pandas as pd
import datatest as dt
from datatest import (
Missing,
Extra,
Invalid,
Deviation,
)
@pytest.fixture(scope='session')
@dt.working_directory(__file__)
def df():
return pd.read_csv('example.csv') # Returns DataFrame.
@pytest.mark.mandatory
def test_column_names(df):
required_names = {'A', 'B', 'C'}
dt.validate(df.columns, required_names)
def test_a(df):
requirement = {'x', 'y', 'z'}
dt.validate(df['A'], requirement)
# ...add more tests here...
if __name__ == '__main__':
import sys
sys.exit(pytest.main(sys.argv))
Pandas (Integrated)
#!/usr/bin/env python3
import pytest
import pandas as pd
import datatest as dt
from datatest import (
Missing,
Extra,
Invalid,
Deviation,
)
@pytest.fixture(scope='session')
@dt.working_directory(__file__)
def df():
return pd.read_csv('example.csv') # Returns DataFrame.
@pytest.fixture(scope='session', autouse=True)
def pandas_integration():
dt.register_accessors()
@pytest.mark.mandatory
def test_column_names(df):
required_names = {'A', 'B', 'C'}
df.columns.validate(required_names)
def test_a(df):
requirement = {'x', 'y', 'z'}
df['A'].validate(requirement)
# ...add more tests here...
if __name__ == '__main__':
import sys
sys.exit(pytest.main(sys.argv))
SQL
#!/usr/bin/env python3
import pytest
import sqlite3
from datatest import (
validate,
accepted,
working_directory,
Missing,
Extra,
Invalid,
Deviation,
)
@pytest.fixture(scope='session')
def connection():
with working_directory(__file__):
conn = sqlite3.connect('example.sqlite3')
yield conn
conn.close()
@pytest.fixture(scope='function')
def cursor(connection):
cur = connection.cursor()
yield cur
cur.close()
@pytest.mark.mandatory
def test_column_names(cursor):
cursor.execute('SELECT * FROM mytable LIMIT 0;')
column_names = [item[0] for item in cursor.description]
required_names = {'A', 'B', 'C'}
validate(column_names, required_names)
def test_a(cursor):
cursor.execute('SELECT A FROM mytable;')
requirement = {'x', 'y', 'z'}
validate(cursor, requirement)
# ...add more tests here...
if __name__ == '__main__':
import sys
sys.exit(pytest.main(sys.argv))
Unittest#
from datatest import DataTestCase, Extra
class TestMyData(DataTestCase):
def test_one(self):
data = ['A', 'B', 'C', 'D']
requirement = {'A', 'B'}
with self.accepted(Extra):
self.assertValid(data, requirement)
from datatest import DataTestCase, mandatory
class TestMyData(DataTestCase):
@mandatory
def test_one(self):
data = ['A', 'A', 'B', 'B']
requirement = {'A', 'B'}
self.assertValid(data, requirement)
$ python3 -m datatest
import unittest
from datatest import DataTestCase
class TestMyData(DataTestCase):
@unittest.skip('Data not yet collected.')
def test_one(self):
data = ...
requirement = ...
self.assertValid(data, requirement)
Pandas
#!/usr/bin/env python3
import pandas as pd
import datatest as dt
from datatest import (
Missing,
Extra,
Invalid,
Deviation,
)
@dt.working_directory(__file__)
def setUpModule():
global df
df = pd.read_csv('example.csv')
class TestMyData(dt.DataTestCase):
@dt.mandatory
def test_column_names(self):
required_names = {'A', 'B', 'C'}
self.assertValid(df.columns, required_names)
def test_a(self):
requirement = {'x', 'y', 'z'}
self.assertValid(df['A'], requirement)
# ...add more tests here...
if __name__ == '__main__':
from datatest import main
main()
Pandas (Integrated
#!/usr/bin/env python3
import pandas as pd
import datatest as dt
from datatest import (
Missing,
Extra,
Invalid,
Deviation,
)
@dt.working_directory(__file__)
def setUpModule():
global df
df = pd.read_csv('example.csv')
dt.register_accessors() # Register pandas accessors.
class TestMyData(dt.DataTestCase):
@dt.mandatory
def test_column_names(self):
required_names = {'A', 'B', 'C'}
df.columns.validate(required_names)
def test_a(self):
requirement = {'x', 'y', 'z'}
df['A'].validate(requirement)
# ...add more tests here...
if __name__ == '__main__':
from datatest import main
main()
SQL
#!/usr/bin/env python3
import sqlite3
from datatest import (
DataTestCase,
mandatory,
working_directory,
Missing,
Extra,
Invalid,
Deviation,
)
@working_directory(__file__)
def setUpModule():
global connection
connection = sqlite3.connect('example.sqlite3')
def tearDownModule():
connection.close()
class MyTest(DataTestCase):
def setUp(self):
cursor = connection.cursor()
self.addCleanup(cursor.close)
self.cursor = cursor
@mandatory
def test_column_names(self):
self.cursor.execute('SELECT * FROM mytable LIMIT 0;')
column_names = [item[0] for item in self.cursor.description]
required_names = {'A', 'B', 'C'}
self.assertValid(column_names, required_names)
def test_a(self):
self.cursor.execute('SELECT A FROM mytable;')
requirement = {'x', 'y', 'z'}
self.assertValid(self.cursor, requirement)
if __name__ == '__main__':
from datatest import main
main()