Cluster to Terms Mapping¶

## >>> from sklearn.decomposition import PCA ## >>> pca = PCA( ## … n_components=5, ## … whiten=False, ## … svd_solver=”auto”, ## … tol=0.0, ## … iterated_power=”auto”, ## … n_oversamples=10, ## … power_iteration_normalizer=”auto”, ## … random_state=0, ## … ) ## >>> from sklearn.cluster import KMeans ## >>> kmeans = KMeans( ## … n_clusters=6, ## … init=”k-means++”, ## … n_init=10, ## … max_iter=300, ## … tol=0.0001, ## … algorithm=”elkan”, ## … random_state=0, ## … ) ## >>> from techminer2.packages.factor_analysis.tfidf import cluster_to_terms_mapping ## >>> mapping = ( ## … ClusterToTermsMapping() ## … # ## … # FIELD: ## … .with_field(“descriptors”) ## … .having_terms_in_top(50) ## … .having_terms_ordered_by(“OCC”) ## … .having_term_occurrences_between(None, None) ## … .having_term_citations_between(None, None) ## … .having_terms_in(None) ## … # ## … # DECOMPOSITION: ## … .using_decomposition_estimator(pca) ## … # ## … # CLUSTERING: ## … .using_clustering_estimator_or_dict(kmeans) ## … # ## … # TFIDF: ## … .using_binary_term_frequencies(False) ## … .using_row_normalization(None) ## … .using_idf_reweighting(False) ## … .using_idf_weights_smoothing(False) ## … .using_sublinear_tf_scaling(False) ## … # ## … # DATABASE: ## … .where_root_directory_is(“examples/fintech/”) ## … .where_database_is(“main”) ## … .where_record_years_range_is(None, None) ## … .where_record_citations_range_is(None, None) ## … .where_records_match(None) ## … # ## … .run() ## … ) ## >>> from pprint import pprint ## >>> pprint(mapping)