Testing Automático#

  • Ultima modificación: Mayo 14, 2022

Archivos de ejemplo#

[ ]:
%%writefile /tmp/example.py
"A","B","C"
"x","foo",20
"x","foo",30
"y","foo",10
"y","bar",20
"z","bar",10
"z","bar",10
[ ]:

Pytest#

Pandas

#!/usr/bin/env python3
import pytest
import pandas as pd
import datatest as dt
from datatest import (
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@pytest.fixture(scope='session')
@dt.working_directory(__file__)
def df():
    return pd.read_csv('example.csv')  # Returns DataFrame.


@pytest.mark.mandatory
def test_column_names(df):
    required_names = {'A', 'B', 'C'}
    dt.validate(df.columns, required_names)


def test_a(df):
    requirement = {'x', 'y', 'z'}
    dt.validate(df['A'], requirement)


# ...add more tests here...


if __name__ == '__main__':
    import sys
    sys.exit(pytest.main(sys.argv))

Pandas (Integrated)

#!/usr/bin/env python3
import pytest
import pandas as pd
import datatest as dt
from datatest import (
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@pytest.fixture(scope='session')
@dt.working_directory(__file__)
def df():
    return pd.read_csv('example.csv')  # Returns DataFrame.


@pytest.fixture(scope='session', autouse=True)
def pandas_integration():
    dt.register_accessors()


@pytest.mark.mandatory
def test_column_names(df):
    required_names = {'A', 'B', 'C'}
    df.columns.validate(required_names)


def test_a(df):
    requirement = {'x', 'y', 'z'}
    df['A'].validate(requirement)


# ...add more tests here...


if __name__ == '__main__':
    import sys
    sys.exit(pytest.main(sys.argv))

SQL

#!/usr/bin/env python3
import pytest
import sqlite3
from datatest import (
    validate,
    accepted,
    working_directory,
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@pytest.fixture(scope='session')
def connection():
    with working_directory(__file__):
        conn = sqlite3.connect('example.sqlite3')
    yield conn
    conn.close()


@pytest.fixture(scope='function')
def cursor(connection):
    cur = connection.cursor()
    yield cur
    cur.close()


@pytest.mark.mandatory
def test_column_names(cursor):
    cursor.execute('SELECT * FROM mytable LIMIT 0;')
    column_names = [item[0] for item in cursor.description]
    required_names = {'A', 'B', 'C'}
    validate(column_names, required_names)


def test_a(cursor):
    cursor.execute('SELECT A FROM mytable;')
    requirement = {'x', 'y', 'z'}
    validate(cursor, requirement)


# ...add more tests here...


if __name__ == '__main__':
    import sys
    sys.exit(pytest.main(sys.argv))

Unittest#

from datatest import DataTestCase, Extra

class TestMyData(DataTestCase):
    def test_one(self):
        data = ['A', 'B', 'C', 'D']
        requirement = {'A', 'B'}
        with self.accepted(Extra):
            self.assertValid(data, requirement)
from datatest import DataTestCase, mandatory

class TestMyData(DataTestCase):
    @mandatory
    def test_one(self):
        data = ['A', 'A', 'B', 'B']
        requirement = {'A', 'B'}
        self.assertValid(data, requirement)
$ python3 -m datatest
import unittest
from datatest import DataTestCase

class TestMyData(DataTestCase):
    @unittest.skip('Data not yet collected.')
    def test_one(self):
        data = ...
        requirement = ...
        self.assertValid(data, requirement)

Pandas

#!/usr/bin/env python3
import pandas as pd
import datatest as dt
from datatest import (
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@dt.working_directory(__file__)
def setUpModule():
    global df
    df = pd.read_csv('example.csv')


class TestMyData(dt.DataTestCase):
    @dt.mandatory
    def test_column_names(self):
        required_names = {'A', 'B', 'C'}
        self.assertValid(df.columns, required_names)

    def test_a(self):
        requirement = {'x', 'y', 'z'}
        self.assertValid(df['A'], requirement)

    # ...add more tests here...


if __name__ == '__main__':
    from datatest import main
    main()

Pandas (Integrated

#!/usr/bin/env python3
import pandas as pd
import datatest as dt
from datatest import (
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@dt.working_directory(__file__)
def setUpModule():
    global df
    df = pd.read_csv('example.csv')
    dt.register_accessors()  # Register pandas accessors.


class TestMyData(dt.DataTestCase):
    @dt.mandatory
    def test_column_names(self):
        required_names = {'A', 'B', 'C'}
        df.columns.validate(required_names)

    def test_a(self):
        requirement = {'x', 'y', 'z'}
        df['A'].validate(requirement)

    # ...add more tests here...


if __name__ == '__main__':
    from datatest import main
    main()

SQL

#!/usr/bin/env python3
import sqlite3
from datatest import (
    DataTestCase,
    mandatory,
    working_directory,
    Missing,
    Extra,
    Invalid,
    Deviation,
)


@working_directory(__file__)
def setUpModule():
    global connection
    connection = sqlite3.connect('example.sqlite3')


def tearDownModule():
    connection.close()


class MyTest(DataTestCase):
    def setUp(self):
        cursor = connection.cursor()
        self.addCleanup(cursor.close)

        self.cursor = cursor

    @mandatory
    def test_column_names(self):
        self.cursor.execute('SELECT * FROM mytable LIMIT 0;')
        column_names = [item[0] for item in self.cursor.description]
        required_names = {'A', 'B', 'C'}
        self.assertValid(column_names, required_names)

    def test_a(self):
        self.cursor.execute('SELECT A FROM mytable;')
        requirement = {'x', 'y', 'z'}
        self.assertValid(self.cursor, requirement)


if __name__ == '__main__':
    from datatest import main
    main()