Apply ThesaurusΒΆ

Example

>>> #
>>> # TEST PREPARATION
>>> #
>>> import sys
>>> from io import StringIO
>>> from techminer2.thesaurus.user import ApplyThesaurus, InitializeThesaurus
>>> # Redirecting stderr to avoid messages during doctests
>>> original_stderr = sys.stderr
>>> sys.stderr = StringIO()
>>> # Reset the thesaurus to initial state
>>> InitializeThesaurus(thesaurus_file="demo.the.txt", field="raw_descriptors",
...     root_directory="examples/fintech/", quiet=True).run()
>>> # Creates, configures, and runs the applier
>>> applier = (
...     ApplyThesaurus(use_colorama=False)
...     .with_thesaurus_file("descriptors.the.txt")
...     .with_field("raw_descriptors")
...     .with_other_field("descriptors_cleaned")
...     .where_root_directory_is("examples/fintech/")
... )
>>> applier.run()
>>> # Capture and print stderr output to test the code using doctest
>>> output = sys.stderr.getvalue()
>>> sys.stderr = original_stderr
>>> print(output)
Applying user thesaurus to database...
          File : examples/fintech/data/thesaurus/descriptors.the.txt
  Source field : raw_descriptors
  Target field : descriptors_cleaned
  Application process completed successfully

>>> # Query the database to check the results
>>> from techminer2.database.tools import Query
>>> Query(
...     query_expression="SELECT descriptors_cleaned FROM database LIMIT 5;",
...     root_directory="examples/fintech/",
...     database="main",
...     record_years_range=(None, None),
...     record_citations_range=(None, None),
... ).run()  
                                 descriptors_cleaned
0  AN_EFFECT; AN_INSTITUTIONAL_ASPECT; AN_MODERAT...
1  ACTOR_NETWORK_THEORY; ANT; AN_UNPRECEDENTED_LE...
2  AN_INITIAL_TECHNOLOGY_ADVANTAGE; CHINA; FINANC...
3  AGGREGATION; ANALYSIS; AN_ADVANTAGE; AN_EXTENS...
4  ACCESS; A_FORM; BEHAVIOURAL_ECONOMICS; DIGITAL...
>>> # Deletes the created field in the database
>>> from techminer2.database.operators import DeleteOperator
>>> DeleteOperator(field="descriptors_cleaned", root_directory="examples/fintech/").run()