Term Occurrence by ClusterΒΆ

>>> from sklearn.cluster import KMeans
>>> from techminer2.document_clustering import term_occurrence_by_cluster
>>> term_occurrence_by_cluster(
...     #
...     # TERMS:
...     field='descriptors',
...     retain_counters=True,
...     #
...     # FILTER PARAMS:
...     top_n=50,
...     occ_range=(None, None),
...     gc_range=(None, None),
...     custom_terms=None,
...     #
...     # ESTIMATOR:
...     sklearn_estimator=KMeans(
...         n_clusters=8,
...         init="k-means++",
...         n_init=10,
...         max_iter=300,
...         tol=0.0001,
...         algorithm="lloyd",
...         random_state=0,
...     ),
...     #
...     # DATABASE PARAMS:
...     root_dir="example/",
...     database="main",
...     year_filter=(None, None),
...     cited_by_filter=(None, None),
...     sort_by=None,
... ).head(20)
cluster                              0   1  2  3  4  5  6  7
descriptors
FINTECH 32:5393                      3  21  0  1  0  2  5  0
FINANCIAL_TECHNOLOGY 18:2519         5   6  1  1  4  0  1  0
FINANCIAL_SERVICES 12:1929           0   3  1  1  4  0  2  1
FINANCE 11:1950                      5   0  1  0  0  2  2  1
FINANCIAL_INDUSTRY 09:2006           0   2  0  0  2  2  3  0
FINTECH_STARTUPS 08:1913             0   3  1  0  0  0  3  1
INNOVATION 08:0990                   0   2  0  0  2  0  4  0
FINANCIAL_SECTOR 07:1562             2   2  0  0  3  0  0  0
INFORMATION_TECHNOLOGY 07:1383       1   2  0  0  2  0  2  0
FINANCIAL_SERVICES_INDUSTRY 06:1370  0   2  0  1  2  0  0  1
FRANCIS_GROUP 05:1227                0   2  1  0  1  0  0  1
FINTECH_COMPANIES 05:1072            0   5  0  0  0  0  0  0
FINANCIAL_INNOVATION 05:0401         0   2  0  0  2  0  1  0
BUSINESS_MODELS 04:1441              0   0  0  1  0  0  2  1
FINANCIAL_SERVICE 04:1036            0   0  1  0  0  1  1  1
INFORMATION_SYSTEMS 04:0830          0   3  0  0  0  0  1  0
FINANCIAL_INSTITUTIONS 04:0722       1   2  0  1  0  0  0  0
FINANCIAL_SYSTEM 04:0688             0   1  0  1  2  0  0  0
ARTIFICIAL_INTELLIGENCE 04:0495      1   1  0  0  2  0  0  0
FINTECH_SERVICES 04:0468             1   1  0  0  2  0  0  0