Term Occurrence by Cluster

>>> from sklearn.cluster import KMeans
>>> from techminer2.document_clustering import term_occurrence_by_cluster
>>> term_occurrence_by_cluster(
...     #
...     # TERMS:
...     field='descriptors',
...     retain_counters=True,
...     #
...     # FILTER PARAMS:
...     top_n=50,
...     occ_range=(None, None),
...     gc_range=(None, None),
...     custom_terms=None,
...     #
...     # ESTIMATOR:
...     sklearn_estimator=KMeans(
...         n_clusters=8,
...         init="k-means++",
...         n_init=10,
...         max_iter=300,
...         tol=0.0001,
...         algorithm="lloyd",
...         random_state=0,
...     ),
...     #
...     # DATABASE PARAMS:
...     root_dir="example/",
...     database="main",
...     year_filter=(None, None),
...     cited_by_filter=(None, None),
...     sort_by=None,
... ).head(20)
cluster                              0   1  2  3  4  5  6  7
descriptors
FINTECH 32:5393                      1  21  0  1  0  2  5  2
FINANCIAL_TECHNOLOGY 18:2519         4   6  0  1  3  0  1  3
FINANCIAL_SERVICES 12:1929           1   3  1  1  3  0  2  1
FINANCE 11:1950                      4   0  1  0  0  2  2  2
FINANCIAL_INDUSTRY 09:2006           0   2  0  0  2  2  3  0
INNOVATION 08:0990                   0   2  0  0  2  0  4  0
FINTECH_STARTUPS 07:1793             0   3  1  0  0  0  3  0
FINANCIAL_SECTOR 07:1562             2   2  0  0  3  0  0  0
INFORMATION_TECHNOLOGY 07:1383       0   2  0  0  2  0  2  1
FINANCIAL_SERVICES_INDUSTRY 06:1370  0   2  1  1  2  0  0  0
FRANCIS_GROUP 05:1227                1   2  1  0  1  0  0  0
FINTECH_COMPANIES 05:1072            0   5  0  0  0  0  0  0
FINANCIAL_INNOVATION 05:0401         0   2  0  0  1  0  1  1
BUSINESS_MODELS 04:1441              0   0  1  1  0  0  2  0
FINANCIAL_SERVICE 04:1036            1   0  1  0  0  1  1  0
INFORMATION_SYSTEMS 04:0830          0   3  0  0  0  0  1  0
FINANCIAL_INSTITUTIONS 04:0722       1   2  0  1  0  0  0  0
FINANCIAL_SYSTEM 04:0688             0   1  0  1  2  0  0  0
ARTIFICIAL_INTELLIGENCE 04:0495      0   1  0  0  1  0  0  2
FINTECH_SERVICES 04:0468             0   1  0  0  2  0  0  1