Collect Nouns and PhrasesΒΆ

Example

>>> import shutil
>>> shutil.copy("examples/fintech/database.csv.zip", "examples/fintech/data/processed/database.csv.zip")
'examples/fintech/data/processed/database.csv.zip'
>>> # Creates, configure, and run the cleaner to prepare the field
>>> from techminer2.database.operators import TokenizeOperator
>>> (
...     TokenizeOperator()
...     #
...     # FIELDS:
...     .with_field("raw_abstract")
...     .with_other_field("cleaned_raw_abstract")
...     #
...     # DATABASE:
...     .where_root_directory_is("examples/fintech/")
...     #
...     .run()
... )
>>> # Creates, configure, and run the operator
>>> from techminer2.database.operators import HighlightOperator
>>> (
...     HighlightOperator()
...     #
...     # FIELDS:
...     .with_field("cleaned_raw_abstract")
...     .with_other_field("highlighted_raw_abstract")
...     #
...     # DATABASE:
...     .where_root_directory_is("examples/fintech/")
...     #
...     .run()
... )
>>> # Collect terms in upper case from the field
>>> from techminer2.database.operators import CollectOperator
>>> (
...     CollectOperator()
...     #
...     # FIELDS:
...     .with_field("highlighted_raw_abstract")
...     .with_other_field("extracted_nouns_and_phrases")
...     #
...     # DATABASE:
...     .where_root_directory_is("examples/fintech/")
...     #
...     .run()
... )
>>> # Query the database to test the cleaner
>>> from techminer2.database.tools import Query
>>> df = (
...     Query()
...     .with_query_expression("SELECT extracted_nouns_and_phrases FROM database LIMIT 10;")
...     .where_root_directory_is("examples/fintech/")
...     .where_database_is("main")
...     .where_record_years_range_is(None, None)
...     .where_record_citations_range_is(None, None)
...     .run()
... )
>>> import textwrap
>>> print(textwrap.fill(df.values[1][0], width=80))
THE_RAPID_DEVELOPMENT; INFORMATION_AND_COMMUNICATIONS_TECHNOLOGY;
THE_ENTIRE_INDUSTRY_LANDSCAPE; A_NEW_ERA; CONVERGENCE_SERVICES;
THE_DEVELOPING_COUNTRIES; THE_FINANCIAL_SECTOR; CHINA; AN_UNPRECEDENTED_LEVEL;
CONVERGENCE; FINANCE; TECHNOLOGY; THE_LENS; ACTOR_NETWORK_THEORY; ANT;
A_MULTI_LEVEL_ANALYSIS; THE_HISTORICAL_DEVELOPMENT; CHINA;
FINANCIAL_TECHNOLOGY_INDUSTRY; THE_PROCESS; BUILDING; A_VARIETY; NETWORKS;
HETEROGENEOUS_ACTORS; THE_NEWLY_EMERGING_CONVERGENCE_INDUSTRY; A_STEPPING_STONE;
THE_INTERACTION; FINTECH; SOCIAL_AND_POLITICAL_CONTEXT;
DISCUSSES_POLICY_IMPLICATIONS; CHINA_FINTECH_INDUSTRY; THE_CHANGING_ROLE;
THE_STATE; THE_GROWTH; NATIONAL_INDUSTRY; CHINA
>>> # Highlighted abstract:
>>> #   THE_RAPID_DEVELOPMENT of INFORMATION_AND_COMMUNICATIONS_TECHNOLOGY is
>>> #   transforming THE_ENTIRE_INDUSTRY_LANDSCAPE , heralding A_NEW_ERA of
>>> #   CONVERGENCE_SERVICES . as one of THE_DEVELOPING_COUNTRIES in
>>> #   THE_FINANCIAL_SECTOR , CHINA is experiencing AN_UNPRECEDENTED_LEVEL of
>>> #   CONVERGENCE between FINANCE and TECHNOLOGY . THIS_STUDY applies THE_LENS of
>>> #   ACTOR_NETWORK_THEORY ( ant ) to conduct A_MULTI_LEVEL_ANALYSIS of
>>> #   THE_HISTORICAL_DEVELOPMENT of CHINA FINANCIAL_TECHNOLOGY ( FINTECH ) INDUSTRY .
>>> #   it attempts to elucidate THE_PROCESS of BUILDING and disrupting A_VARIETY of
>>> #   NETWORKS comprising HETEROGENEOUS_ACTORS involved in
>>> #   THE_NEWLY_EMERGING_CONVERGENCE_INDUSTRY . THIS_RESEARCH represents
>>> #   A_STEPPING_STONE in exploring THE_INTERACTION between FINTECH and its yet
>>> #   unfolding SOCIAL_AND_POLITICAL_CONTEXT . it also DISCUSSES_POLICY_IMPLICATIONS
>>> #   for CHINA_FINTECH_INDUSTRY , focusing_on THE_CHANGING_ROLE of THE_STATE in
>>> #   fostering THE_GROWTH of NATIONAL_INDUSTRY within and outside_of CHINA . 2015
>>> #   ELSEVIER_LTD .
>>> # Deletes the fields
>>> from techminer2.database.operators import DeleteOperator
>>> field_deleter = (
...     DeleteOperator()
...     .where_root_directory_is("examples/fintech/")
... )
>>> field_deleter.with_field("cleaned_raw_abstract").run()
>>> field_deleter.with_field("highlighted_raw_abstract").run()
>>> field_deleter.with_field("extracted_nouns_and_phrases").run()