Fill NAΒΆ
Example
>>> import shutil
>>> shutil.copy("examples/fintech/database.csv.zip", "examples/fintech/data/processed/database.csv.zip")
'examples/fintech/data/processed/database.csv.zip'
>>> import shutil
>>> shutil.copy("examples/fintech/database.csv.zip", "examples/fintech/data/processed/database.csv.zip")
'examples/fintech/data/processed/database.csv.zip'
>>> import pandas as pd
>>> from techminer2.database.operators import TransformOperator
>>> TransformOperator(
... field="raw_index_keywords",
... other_field="na_field",
... root_directory="examples/fintech/",
... transformation_function=lambda x: pd.NA,
... ).run()
>>> # Query the database to obtain the number of NA values
>>> from techminer2.database.tools import Query
>>> query = (
... Query()
... .with_query_expression("SELECT na_field FROM database;")
... .where_root_directory_is("examples/fintech/")
... .where_database_is("main")
... .where_record_years_range_is(None, None)
... .where_record_citations_range_is(None, None)
... )
>>> df = query.run()
>>> int(df.na_field.isna().sum())
50
>>> # Creates, configures, and runs the operator
>>> from techminer2.database.operators import FillNAOperator
>>> fillna_operator = (
... FillNAOperator()
... #
... # FIELDS:
... .with_field("na_field")
... .with_other_field("raw_index_keywords")
... #
... # DATABASE:
... .where_root_directory_is("examples/fintech/")
... )
>>> fillna_operator.run()
>>> # Query the database to test the operator
>>> from techminer2.database.tools import Query
>>> query = (
... Query()
... .with_query_expression("SELECT na_field FROM database;")
... .where_root_directory_is("examples/fintech/")
... .where_database_is("main")
... .where_record_years_range_is(None, None)
... .where_record_citations_range_is(None, None)
... )
>>> df = query.run()
>>> int(df.na_field.isna().sum())
31
>>> # Deletes the field
>>> from techminer2.database.operators import DeleteOperator
>>> DeleteOperator(
... field="na_field",
... root_directory="examples/fintech/",
... ).run()