Terms by Cluster DataframeΒΆ

>>> from sklearn.cluster import KMeans
>>> from techminer2.document_clustering import terms_by_cluster_frame
>>> terms_by_cluster_frame(
...     #
...     # TERMS:
...     field='descriptors',
...     retain_counters=True,
...     #
...     # FILTER PARAMS:
...     top_n=50,
...     occ_range=(None, None),
...     gc_range=(None, None),
...     custom_terms=None,
...     #
...     # ESTIMATOR:
...     sklearn_estimator=KMeans(
...         n_clusters=4,
...         init="k-means++",
...         n_init=10,
...         max_iter=300,
...         tol=0.0001,
...         algorithm="lloyd",
...         random_state=0,
...     ),
...     #
...     # DATABASE PARAMS:
...     root_dir="example/",
...     database="main",
...     year_filter=(None, None),
...     cited_by_filter=(None, None),
...     sort_by=None,
... ).head(10)
                                     0  ...                               3
0                   INNOVATION 08:0990  ...                 FINTECH 32:5393
1  FINANCIAL_SERVICES_INDUSTRY 06:1370  ...    FINANCIAL_TECHNOLOGY 18:2519
2              BUSINESS_MODELS 04:1441  ...      FINANCIAL_SERVICES 12:1929
3          INFORMATION_SYSTEMS 04:0830  ...                 FINANCE 11:1950
4                   BLOCKCHAIN 03:0881  ...      FINANCIAL_INDUSTRY 09:2006
5           FINTECH_REVOLUTION 03:0731  ...        FINTECH_STARTUPS 08:1913
6                      BANKING 03:0370  ...        FINANCIAL_SECTOR 07:1562
7                   STUDY_AIMS 03:0283  ...  INFORMATION_TECHNOLOGY 07:1383
8            ACADEMIC_RESEARCH 02:0691  ...           FRANCIS_GROUP 05:1227
9                CURRENT_STATE 02:0691  ...       FINTECH_COMPANIES 05:1072

[10 rows x 4 columns]