>>> from sklearn.cluster import KMeans
>>> from techminer2.document_clustering import terms_by_cluster_summary
>>> terms_by_cluster_summary(
... #
... # TERMS:
... field='descriptors',
... retain_counters=True,
... #
... # FILTER PARAMS:
... top_n=50,
... occ_range=(None, None),
... gc_range=(None, None),
... custom_terms=None,
... #
... # ESTIMATOR:
... sklearn_estimator=KMeans(
... n_clusters=4,
... init="k-means++",
... n_init=10,
... max_iter=300,
... tol=0.0001,
... algorithm="lloyd",
... random_state=0,
... ),
... #
... # DATABASE PARAMS:
... root_dir="example/",
... database="main",
... year_filter=(None, None),
... cited_by_filter=(None, None),
... sort_by=None,
... )
Cluster ... Terms
0 0 ... FINANCIAL_INDUSTRY 09:2006; BUSINESS_MODELS 04...
1 1 ... FINANCIAL_SERVICE 04:1036; COMMERCE 03:0846; C...
2 2 ... FINTECH 32:5393; FINANCIAL_TECHNOLOGY 18:2519;...
3 3 ... SUSTAINABLE_DEVELOPMENT 04:0306; ELSEVIER_LTD ...
[4 rows x 4 columns]