>>> from sklearn.cluster import KMeans
>>> from techminer2.document_clustering import term_occurrence_by_cluster
>>> term_occurrence_by_cluster(
... #
... # TERMS:
... field='descriptors',
... retain_counters=True,
... #
... # FILTER PARAMS:
... top_n=50,
... occ_range=(None, None),
... gc_range=(None, None),
... custom_terms=None,
... #
... # ESTIMATOR:
... sklearn_estimator=KMeans(
... n_clusters=8,
... init="k-means++",
... n_init=10,
... max_iter=300,
... tol=0.0001,
... algorithm="lloyd",
... random_state=0,
... ),
... #
... # DATABASE PARAMS:
... root_dir="example/",
... database="main",
... year_filter=(None, None),
... cited_by_filter=(None, None),
... sort_by=None,
... ).head(20)
cluster 0 1 2 3 4 5 6 7
descriptors
FINTECH 32:5393 3 21 0 1 0 2 5 0
FINANCIAL_TECHNOLOGY 18:2519 5 6 1 1 4 0 1 0
FINANCIAL_SERVICES 12:1929 0 3 1 1 4 0 2 1
FINANCE 11:1950 5 0 1 0 0 2 2 1
FINANCIAL_INDUSTRY 09:2006 0 2 0 0 2 2 3 0
FINTECH_STARTUPS 08:1913 0 3 1 0 0 0 3 1
INNOVATION 08:0990 0 2 0 0 2 0 4 0
FINANCIAL_SECTOR 07:1562 2 2 0 0 3 0 0 0
INFORMATION_TECHNOLOGY 07:1383 1 2 0 0 2 0 2 0
FINANCIAL_SERVICES_INDUSTRY 06:1370 0 2 0 1 2 0 0 1
FRANCIS_GROUP 05:1227 0 2 1 0 1 0 0 1
FINTECH_COMPANIES 05:1072 0 5 0 0 0 0 0 0
FINANCIAL_INNOVATION 05:0401 0 2 0 0 2 0 1 0
BUSINESS_MODELS 04:1441 0 0 0 1 0 0 2 1
FINANCIAL_SERVICE 04:1036 0 0 1 0 0 1 1 1
INFORMATION_SYSTEMS 04:0830 0 3 0 0 0 0 1 0
FINANCIAL_INSTITUTIONS 04:0722 1 2 0 1 0 0 0 0
FINANCIAL_SYSTEM 04:0688 0 1 0 1 2 0 0 0
ARTIFICIAL_INTELLIGENCE 04:0495 1 1 0 0 2 0 0 0
FINTECH_SERVICES 04:0468 1 1 0 0 2 0 0 0