>>> from sklearn.cluster import KMeans
>>> from techminer2.document_clustering import terms_by_cluster_summary
>>> terms_by_cluster_summary(
... #
... # TERMS:
... field='descriptors',
... retain_counters=True,
... #
... # FILTER PARAMS:
... top_n=50,
... occ_range=(None, None),
... gc_range=(None, None),
... custom_terms=None,
... #
... # ESTIMATOR:
... sklearn_estimator=KMeans(
... n_clusters=4,
... init="k-means++",
... n_init=10,
... max_iter=300,
... tol=0.0001,
... algorithm="lloyd",
... random_state=0,
... ),
... #
... # DATABASE PARAMS:
... root_dir="example/",
... database="main",
... year_filter=(None, None),
... cited_by_filter=(None, None),
... sort_by=None,
... )
Cluster ... Terms
0 0 ... INNOVATION 08:0990; FINANCIAL_SERVICES_INDUSTR...
1 1 ... FINANCIAL_SERVICE 04:1036; NEW_TECHNOLOGIES 02...
2 2 ... DISRUPTIVE_INNOVATION 02:0759
3 3 ... FINTECH 32:5393; FINANCIAL_TECHNOLOGY 18:2519;...
[4 rows x 4 columns]