Fill NAΒΆ

Example

>>> import shutil
>>> shutil.copy("examples/fintech/database.csv.zip", "examples/fintech/data/processed/database.csv.zip")
'examples/fintech/data/processed/database.csv.zip'
>>> import shutil
>>> shutil.copy("examples/fintech/database.csv.zip", "examples/fintech/data/processed/database.csv.zip")
'examples/fintech/data/processed/database.csv.zip'
>>> import pandas as pd
>>> from techminer2.database.operators import TransformOperator
>>> TransformOperator(
...     field="raw_index_keywords",
...     other_field="na_field",
...     root_directory="examples/fintech/",
...     transformation_function=lambda x: pd.NA,
... ).run()
>>> # Query the database to obtain the number of NA values
>>> from techminer2.database.tools import Query
>>> query = (
...     Query()
...     .with_query_expression("SELECT na_field FROM database;")
...     .where_root_directory_is("examples/fintech/")
...     .where_database_is("main")
...     .where_record_years_range_is(None, None)
...     .where_record_citations_range_is(None, None)
... )
>>> df = query.run()
>>> int(df.na_field.isna().sum())
50
>>> # Creates, configures, and runs the operator
>>> from techminer2.database.operators import FillNAOperator
>>> fillna_operator = (
...     FillNAOperator()
...     #
...     # FIELDS:
...     .with_field("na_field")
...     .with_other_field("raw_index_keywords")
...     #
...     # DATABASE:
...     .where_root_directory_is("examples/fintech/")
... )
>>> fillna_operator.run()
>>> # Query the database to test the operator
>>> from techminer2.database.tools import Query
>>> query = (
...     Query()
...     .with_query_expression("SELECT na_field FROM database;")
...     .where_root_directory_is("examples/fintech/")
...     .where_database_is("main")
...     .where_record_years_range_is(None, None)
...     .where_record_citations_range_is(None, None)
... )
>>> df = query.run()
>>> int(df.na_field.isna().sum())
31
>>> # Deletes the field
>>> from techminer2.database.operators import DeleteOperator
>>> DeleteOperator(
...     field="na_field",
...     root_directory="examples/fintech/",
... ).run()