Terms by Cluster DataframeΒΆ

Example

>>> from sklearn.cluster import KMeans
>>> kmeans = KMeans(
...     n_clusters=4,
...     init="k-means++",
...     n_init=10,
...     max_iter=300,
...     tol=0.0001,
...     algorithm="lloyd",
...     random_state=0,
... )
>>> from techminer2.packages.document_clustering import TermsByClusterDataFrame
>>> (
...     TermsByClusterDataFrame()
...     #
...     # FIELD:
...     .with_field("raw_keywords")
...     .having_terms_in_top(50)
...     .having_terms_ordered_by("OCC")
...     .having_term_occurrences_between(None, None)
...     .having_term_citations_between(None, None)
...     .having_terms_in(None)
...     #
...     # COUNTERS:
...     .using_term_counters(True)
...     #
...     # TFIDF:
...     .using_binary_term_frequencies(False)
...     .using_row_normalization(None)
...     .using_idf_reweighting(False)
...     .using_idf_weights_smoothing(False)
...     .using_sublinear_tf_scaling(False)
...     #
...     # CLUSTERING:
...     .using_clustering_algorithm_or_dict(kmeans)
...     #
...     # DATABASE:
...     .where_root_directory_is("examples/fintech/")
...     .where_database_is("main")
...     .where_record_years_range_is(None, None)
...     .where_record_citations_range_is(None, None)
...     .where_records_match(None)
...     #
...     .run()
... ).head(10)
                               0  ...                             3
0     FINANCIAL_SERVICES 05:0746  ...               FINTECH 32:5393
1        BUSINESS_MODELS 03:1335  ...            BLOCKCHAIN 03:0881
2  FINANCIAL_INSTITUTION 03:0488  ...   FINANCIAL_INCLUSION 03:0590
3   FINANCIAL_TECHNOLOGY 03:0461  ...          CROWDFUNDING 03:0335
4                BANKING 03:0370  ...   MARKETPLACE_LENDING 03:0317
5             TECHNOLOGY 02:0310  ...      ELECTRONIC_MONEY 03:0305
6                REGTECH 02:0266  ...           LENDINGCLUB 02:0253
7                  CHINA 02:0150  ...  PEER_TO_PEER_LENDING 02:0253
8                                 ...        SHADOW_BANKING 02:0253
9                                 ...           P2P_LENDING 02:0161

[10 rows x 4 columns]