>>> from sklearn.cluster import KMeans
>>> from techminer2.document_clustering import terms_by_cluster_frame
>>> terms_by_cluster_frame(
... #
... # TERMS:
... field='descriptors',
... retain_counters=True,
... #
... # FILTER PARAMS:
... top_n=50,
... occ_range=(None, None),
... gc_range=(None, None),
... custom_terms=None,
... #
... # ESTIMATOR:
... sklearn_estimator=KMeans(
... n_clusters=4,
... init="k-means++",
... n_init=10,
... max_iter=300,
... tol=0.0001,
... algorithm="lloyd",
... random_state=0,
... ),
... #
... # DATABASE PARAMS:
... root_dir="example/",
... database="main",
... year_filter=(None, None),
... cited_by_filter=(None, None),
... sort_by=None,
... ).head(10)
0 ... 3
0 INNOVATION 08:0990 ... FINTECH 32:5393
1 FINANCIAL_SERVICES_INDUSTRY 06:1370 ... FINANCIAL_TECHNOLOGY 18:2519
2 BUSINESS_MODELS 04:1441 ... FINANCIAL_SERVICES 12:1929
3 INFORMATION_SYSTEMS 04:0830 ... FINANCE 11:1950
4 BLOCKCHAIN 03:0881 ... FINANCIAL_INDUSTRY 09:2006
5 FINTECH_REVOLUTION 03:0731 ... FINTECH_STARTUPS 08:1913
6 BANKING 03:0370 ... FINANCIAL_SECTOR 07:1562
7 STUDY_AIMS 03:0283 ... INFORMATION_TECHNOLOGY 07:1383
8 ACADEMIC_RESEARCH 02:0691 ... FRANCIS_GROUP 05:1227
9 CURRENT_STATE 02:0691 ... FINTECH_COMPANIES 05:1072
[10 rows x 4 columns]