>>> from sklearn.cluster import KMeans
>>> from techminer2.document_clustering import terms_by_cluster_frame
>>> terms_by_cluster_frame(
... #
... # TERMS:
... field='descriptors',
... retain_counters=True,
... #
... # FILTER PARAMS:
... top_n=50,
... occ_range=(None, None),
... gc_range=(None, None),
... custom_terms=None,
... #
... # ESTIMATOR:
... sklearn_estimator=KMeans(
... n_clusters=4,
... init="k-means++",
... n_init=10,
... max_iter=300,
... tol=0.0001,
... algorithm="lloyd",
... random_state=0,
... ),
... #
... # DATABASE PARAMS:
... root_dir="example/",
... database="main",
... year_filter=(None, None),
... cited_by_filter=(None, None),
... sort_by=None,
... ).head(10)
0 ... 3
0 FINANCIAL_INDUSTRY 09:2006 ... SUSTAINABLE_DEVELOPMENT 04:0306
1 BUSINESS_MODELS 04:1441 ... ELSEVIER_LTD 03:0474
2 INFORMATION_SYSTEMS 04:0830 ... SUSTAINABILITY 03:0227
3 SURVEYS 03:0484 ...
4 CROWDFUNDING 03:0335 ...
5 STUDY_AIMS 03:0283 ...
6 NEW_TECHNOLOGIES 02:0773 ...
7 DISRUPTIVE_INNOVATION 02:0759 ...
8 ACADEMIC_RESEARCH 02:0691 ...
9 CURRENT_STATE 02:0691 ...
[10 rows x 4 columns]