>>> from sklearn.decomposition import PCA
>>> from sklearn.cluster import KMeans
>>> from techminer2.factor_analysis.tfidf import terms_to_cluster_mapping
>>> mapping = terms_to_cluster_mapping(
... #
... # PARAMS:
... field="author_keywords",
... #
... # TF PARAMS:
... is_binary=True,
... cooc_within=1,
... #
... # TF-IDF PARAMS:
... norm=None,
... use_idf=False,
... smooth_idf=False,
... sublinear_tf=False,
... #
... # TERM PARAMS:
... top_n=20,
... occ_range=(None, None),
... gc_range=(None, None),
... custom_terms=None,
... #
... # DESOMPOSITION PARAMS:
... decomposition_estimator = PCA(
... n_components=5,
... whiten=False,
... svd_solver="auto",
... tol=0.0,
... iterated_power="auto",
... n_oversamples=10,
... power_iteration_normalizer="auto",
... random_state=0,
... ),
... #
... # CLUSTERING:
... clustering_estimator_or_dict = KMeans(
... n_clusters=6,
... init="k-means++",
... n_init=10,
... max_iter=300,
... tol=0.0001,
... algorithm="elkan",
... random_state=0,
... ),
... #
... # DATABASE PARAMS:
... root_dir="example/",
... database="main",
... year_filter=(None, None),
... cited_by_filter=(None, None),
... )
>>> from pprint import pprint
>>> pprint(mapping)
{'ARTIFICIAL_INTELLIGENCE 02:0327': 4,
'BANKING 02:0291': 3,
'BLOCKCHAIN 02:0305': 0,
'BUSINESS_MODELS 02:0759': 2,
'CASE_STUDY 02:0340': 0,
'CROWDFUNDING 03:0335': 0,
'CYBER_SECURITY 02:0342': 0,
'FINANCE 02:0309': 4,
'FINANCIAL_INCLUSION 03:0590': 0,
'FINANCIAL_SERVICES 04:0667': 2,
'FINANCIAL_TECHNOLOGY 03:0461': 2,
'FINTECH 31:5168': 5,
'INNOVATION 07:0911': 3,
'LENDINGCLUB 02:0253': 1,
'MARKETPLACE_LENDING 03:0317': 1,
'PEER_TO_PEER_LENDING 02:0253': 1,
'REGTECH 02:0266': 2,
'ROBOTS 02:0289': 4,
'SHADOW_BANKING 02:0253': 1,
'TECHNOLOGY 02:0310': 3}