Terms by Cluster FrameΒΆ

Example

>>> from techminer2.experimental.co_occurrence import TermsByClusterDataFrame
>>> from techminer2.thesaurus.descriptors import ApplyThesaurus, InitializeThesaurus
>>> # Restore the column values to initial values
>>> InitializeThesaurus(root_directory="examples/fintech/", quiet=True).run()
>>> ApplyThesaurus(root_directory="examples/fintech/", quiet=True).run()
>>> # Generate terms by cluster data frame
>>> df = (
...     TermsByClusterDataFrame()
...     #
...     # FIELD:
...     .having_terms_in_top(20)
...     .having_terms_ordered_by("OCC")
...     .having_term_occurrences_between(None, None)
...     .having_term_citations_between(None, None)
...     .having_terms_in(None)
...     #
...     # NETWORK:
...     .using_clustering_algorithm_or_dict("louvain")
...     .using_association_index("association")
...     .using_minimum_terms_in_cluster(5)
...     #
...     # DATABASE:
...     .where_root_directory_is("examples/fintech/")
...     .where_database_is("main")
...     .where_record_years_range_is(None, None)
...     .where_record_citations_range_is(None, None)
...     .where_records_match(None)
...     #
...     .run()
... )
>>> # Display the resulting data frame
>>> print(df.to_string()) 
                                0                               1                                        2                   3
0                 FINTECH 38:6131            TECHNOLOGIES 15:1633                  THE_DEVELOPMENT 09:1293       BANKS 08:1049
1  THE_FINANCIAL_INDUSTRY 09:2006  FINANCIAL_TECHNOLOGIES 12:1615                       INNOVATION 08:1816        DATA 07:1086
2            PRACTITIONER 06:1194                 FINANCE 10:1188  THE_FINANCIAL_SERVICES_INDUSTRY 06:1237   CONSUMERS 07:0925
3    THE_FINANCIAL_SECTOR 05:1147              REGULATORS 08:0974               FINANCIAL_SERVICES 06:1116  THE_IMPACT 06:0908
4  INFORMATION_TECHNOLOGY 05:1101                   CHINA 06:0673                         SERVICES 06:1089
5       FINTECH_COMPANIES 05:1072