Source code for techminer2.operations.tokenize_column
# flake8: noqa
# pylint: disable=invalid-name
# pylint: disable=line-too-long
# pylint: disable=missing-docstring
# pylint: disable=too-many-arguments
# pylint: disable=too-many-locals
# pylint: disable=too-many-statements
"""
Tokenize Column
===============================================================================
Example:
>>> import shutil
>>> shutil.copy("examples/fintech/database.csv.zip", "examples/fintech/data/processed/database.csv.zip")
'examples/fintech/data/processed/database.csv.zip'
>>> from techminer2.database.operators import TokenizeOperator
>>> # Creates, configure, and run the tokenize_
>>> (
... TokenizeOperator()
... #
... # FIELDS:
... .with_field("raw_abstract")
... .with_other_field("tokenized_raw_abstract")
... #
... # DATABASE:
... .where_root_directory("examples/fintech/")
... #
... .run()
... )
>>> # Query the database to test the TokenizeOperator
>>> from techminer2.io import Query
>>> df = (
... Query()
... .with_query_expression("SELECT tokenized_raw_abstract FROM database LIMIT 10;")
... .where_root_directory("examples/fintech/")
... .where_database("main")
... .where_record_years_range(None, None)
... .where_record_citations_range(None, None)
... .run()
... )
>>> import textwrap
>>> print(textwrap.fill(df.values[1][0], width=90))
the rapid development of information and communications technology is transforming the
entire industry landscape , heralding a new era of convergence services . as one of the
developing countries in the financial sector , china is experiencing an unprecedented
level of convergence between finance and technology . this study applies the lens of actor
network theory ( ant ) to conduct a multi level analysis of the historical development of
china ' s financial technology ( fintech ) industry . it attempts to elucidate the process
of building and disrupting a variety of networks comprising heterogeneous actors involved
in the newly emerging convergence industry . this research represents a stepping stone in
exploring the interaction between fintech and its yet unfolding social and political
context . it also discusses policy implications for china ' s fintech industry , focusing
on the changing role of the state in fostering the growth of national industry within and
outside of china . 2015 elsevier ltd .
>>> # Deletes the field
>>> from techminer2.database.operators import DeleteOperator
>>> field_deleter = (
... DeleteOperator()
... .with_field("tokenized_raw_abstract")
... .where_root_directory("examples/fintech/")
... )
>>> field_deleter.run()
"""
from techminer2._internals.mixins import ParamsMixin
from techminer2.io._internals.operations.tokenize_column import tokenize_column
from techminer2.text.extract._helpers.protected_fields import PROTECTED_FIELDS
[docs]
class TokenizeColumn(
ParamsMixin,
):
[docs]
def run(self) -> int:
if self.params.source_field == self.params.target_field:
raise ValueError(
f"Source and target fields must differ (got `{self.params.source_field}`)"
)
if self.params.target_field in PROTECTED_FIELDS:
raise ValueError(
f"Cannot overwrite protected field `{self.params.target_field}`"
)
return tokenize_column(
source=self.params.source_field,
target=self.params.target_field,
root_directory=self.params.root_directory,
)
#