>>> from sklearn.decomposition import PCA
>>> from sklearn.cluster import KMeans
>>> from techminer2.factor_analysis.tfidf import cluster_centers_frame
>>> cluster_centers_frame(
... #
... # PARAMS:
... field="author_keywords",
... #
... # TF PARAMS:
... is_binary=True,
... cooc_within=1,
... #
... # TF-IDF PARAMS:
... norm=None,
... use_idf=False,
... smooth_idf=False,
... sublinear_tf=False,
... #
... # TERM PARAMS:
... top_n=20,
... occ_range=(None, None),
... gc_range=(None, None),
... custom_terms=None,
... #
... # DESOMPOSITION PARAMS:
... decomposition_estimator = PCA(
... n_components=5,
... whiten=False,
... svd_solver="auto",
... tol=0.0,
... iterated_power="auto",
... n_oversamples=10,
... power_iteration_normalizer="auto",
... random_state=0,
... ),
... #
... # CLUSTERING:
... clustering_estimator_or_dict = KMeans(
... n_clusters=6,
... init="k-means++",
... n_init=10,
... max_iter=300,
... tol=0.0001,
... algorithm="elkan",
... random_state=0,
... ),
... #
... # DATABASE PARAMS:
... root_dir="example/",
... database="main",
... year_filter=(None, None),
... cited_by_filter=(None, None),
... )
dim 0 1 2 3 4
cluster
0 -0.201359 -0.145628 -0.537202 -0.307006 -0.472928
1 -0.237531 -0.943339 0.790967 0.032676 -0.003779
2 -0.203930 0.314080 -0.207809 0.815849 -0.031831
3 -0.254730 1.080850 0.568245 -0.299478 -0.064949
4 -0.474124 0.044653 -0.408102 -0.313095 0.858150
5 4.959197 -0.131331 -0.127054 -0.021353 0.127476