Terms by Cluster FrameΒΆ

>>> from techminer2.topic_modeling import terms_by_cluster_frame
>>> from sklearn.decomposition import LatentDirichletAllocation
>>> terms_by_cluster_frame(
...     field="author_keywords",
...     #
...     # TF PARAMS:
...     is_binary=True,
...     cooc_within=3,
...     #
...     # TF-IDF PARAMS:
...     norm=None,
...     use_idf=False,
...     smooth_idf=False,
...     sublinear_tf=False,
...     #
...     # ITEM FILTERS:
...     top_n=None,
...     occ_range=(None, None),
...     gc_range=(None, None),
...     custom_terms=None,
...     #
...     # ESTIMATOR:
...     sklearn_estimator=LatentDirichletAllocation(
...         n_components=10,
...         learning_decay=0.7,
...         learning_offset=50.0,
...         max_iter=10,
...         batch_size=128,
...         evaluate_every=-1,
...         perp_tol=0.1,
...         mean_change_tol=0.001,
...         max_doc_update_iter=100,
...         random_state=0,
...     ),
...     #
...     # DATABASE PARAMS:
...     root_dir="example/",
...     database="main",
...     year_filter=(None, None),
...     cited_by_filter=(None, None),
... ).head()
cluster                                    0  ...                            9
term                                          ...
0                            FINTECH 31:5168  ...              FINTECH 31:5168
1                 FINANCIAL_SERVICES 04:0667  ...           INNOVATION 07:0911
2               FINANCIAL_TECHNOLOGY 03:0461  ...  FINANCIAL_INCLUSION 03:0590
3                         INNOVATION 07:0911  ...       MOBILE_PAYMENT 02:0184
4        SERVICE_INNOVATION_STRATEGY 01:0079  ...           CASE_STUDY 02:0340

[5 rows x 10 columns]