Clusters to Terms MappingΒΆ

>>> from sklearn.cluster import KMeans
>>> kmeans = KMeans(
...     n_clusters=4,
...     init="k-means++",
...     n_init=10,
...     max_iter=300,
...     tol=0.0001,
...     algorithm="lloyd",
...     random_state=0,
... )
>>> from techminer2.packages.document_clustering import ClustersToTermsMapping
>>> mapping = (
...     ClustersToTermsMapping()
...     #
...     # FIELD:
...     .with_field("raw_keywords")
...     .having_terms_in_top(50)
...     .having_terms_ordered_by("OCC")
...     .having_term_occurrences_between(None, None)
...     .having_term_citations_between(None, None)
...     .having_terms_in(None)
...     #
...     # COUNTERS:
...     .using_term_counters(True)
...     #
...     # TFIDF:
...     .using_binary_term_frequencies(False)
...     .using_row_normalization(None)
...     .using_idf_reweighting(False)
...     .using_idf_weights_smoothing(False)
...     .using_sublinear_tf_scaling(False)
...     #
...     # CLUSTERING:
...     .using_clustering_algorithm_or_dict(kmeans)
...     #
...     # DATABASE:
...     .where_root_directory_is("example/")
...     .where_database_is("main")
...     .where_record_years_range_is(None, None)
...     .where_record_citations_range_is(None, None)
...     .where_records_match(None)
...     #
...     .run()
... )
>>> import pprint
>>> pprint.pprint(mapping)
{0: ['SUSTAINABILITY 03:0227',
     'SUSTAINABLE_DEVELOPMENT 03:0227',
     'LITERATURE_REVIEW 02:0560',
     'FINANCIAL_SYSTEM 02:0385',
     'DIGITIZATION 02:0319',
     'CHINA 02:0150'],
 1: ['FINTECH 32:5393',
     'FINANCE 11:1950',
     'INNOVATION 08:0990',
     'FINANCIAL_SERVICES 05:0746',
     'FINANCIAL_SERVICE 04:1036',
     'BUSINESS_MODELS 03:1335',
     'BLOCKCHAIN 03:0881',
     'COMMERCE 03:0846',
     'FINANCIAL_INCLUSION 03:0590',
     'FINANCIAL_INSTITUTION 03:0488',
     'SURVEYS 03:0484',
     'FINANCIAL_TECHNOLOGY 03:0461',
     'BANKING 03:0370',
     'CROWDFUNDING 03:0335',
     'ELECTRONIC_MONEY 03:0305',
     'FINANCIAL_SERVICES_INDUSTRIES 02:0696',
     'DIGITAL_TECHNOLOGIES 02:0494',
     'PERCEIVED_USEFULNESS 02:0346',
     'CYBER_SECURITY 02:0342',
     'CASE_STUDY 02:0340',
     'DESIGN_METHODOLOGY_APPROACH 02:0329',
     'SALES 02:0329',
     'ARTIFICIAL_INTELLIGENCE 02:0327',
     'FINANCIAL_INDUSTRY 02:0323',
     'SECURITY_AND_PRIVACY 02:0323',
     'TECHNOLOGY 02:0310',
     'ROBOTS 02:0289',
     'TECHNOLOGY_ACCEPTANCE_MODEL 02:0280',
     'REGTECH 02:0266',
     'DEVELOPING_COUNTRIES 02:0248',
     'INFORMATION_SYSTEMS 02:0235',
     'THEORETICAL_FRAMEWORK 02:0206',
     'MOBILE_TELECOMMUNICATION_SYSTEMS 02:0186',
     'GLOBAL_SYSTEM_FOR_MOBILE_COMMUNICATIONS 02:0184',
     'MOBILE_PAYMENT 02:0184',
     'P2P_LENDING 02:0161',
     'CUSTOMER_EXPERIENCE 01:0576'],
 2: ['MARKETPLACE_LENDING 03:0317',
     'LENDINGCLUB 02:0253',
     'PEER_TO_PEER_LENDING 02:0253',
     'SHADOW_BANKING 02:0253'],
 3: ['CONTENT_ANALYSIS 02:0181',
     'DIGITALIZATION 02:0181',
     'POPULAR_PRESS 02:0181']}