Theme to Documents Mapping

>>> from techminer2.topic_modeling import theme_to_documents_mapping
>>> from sklearn.decomposition import LatentDirichletAllocation
>>> mapping = theme_to_documents_mapping(
...     field="author_keywords",
...     #
...     # TF PARAMS:
...     is_binary=True,
...     cooc_within=2,
...     #
...     # TF-IDF PARAMS:
...     norm=None,
...     use_idf=False,
...     smooth_idf=False,
...     sublinear_tf=False,
...     #
...     # TOP TERMS:
...     n_top_terms=5,
...     #
...     # ITEM FILTERS:
...     top_n=None,
...     occ_range=(None, None),
...     gc_range=(None, None),
...     custom_terms=None,
...     #
...     # ESTIMATOR:
...     sklearn_estimator=LatentDirichletAllocation(
...         n_components=10,
...         learning_decay=0.7,
...         learning_offset=50.0,
...         max_iter=10,
...         batch_size=128,
...         evaluate_every=-1,
...         perp_tol=0.1,
...         mean_change_tol=0.001,
...         max_doc_update_iter=100,
...         random_state=0,
...     ),
...     #
...     # DATABASE PARAMS:
...     root_dir="example/",
...     database="main",
...     year_filter=(None, None),
...     cited_by_filter=(None, None),
... )
>>> import pprint
>>> pprint.pprint(mapping)
{0: ['Anagnostopoulos I., 2018, J ECON BUS, V100, P7',
     'Du W.D., 2019, J STRATEGIC INFORM SYST, V28, P50',
     'Gai K., 2017, LECT NOTES COMPUT SCI, V10135 LNCS, P236',
     'Gimpel H., 2018, ELECTRON MARK, V28, P245',
     'Haddad C., 2019, SMALL BUS ECON, V53, P81',
     'Jakšič M., 2019, RISK MANAGE, V21, P1',
     'Kang J., 2018, HUMCENTRIC COMPUT INF SCI, V8',
     'Puschmann T., 2017, BUSIN INFO SYS ENG, V59, P69',
     'Saksonova S., 2017, EUR RES STUD, V20, P961',
     'Schueffel P., 2016, J INNOV MANAG, V4, P32',
     'Zhao Q., 2019, SUSTAINABILITY, V11'],
 1: ['Jagtiani J., 2018, J ECON BUS, V100, P1',
     'Jagtiani J., 2018, J ECON BUS, V100, P43',
     'Li Y./2, 2017, FINANCIAL INNOV, V3',
     'Stewart H., 2018, INF COMPUT SECURITY, V26, P109'],
 2: ['Das S.R., 2019, FINANC MANAGE, V48, P981',
     'Gai K., 2018, J NETWORK COMPUT APPL, V103, P262',
     'Hu Z., 2019, SYMMETRY, V11',
     'Romanova I., 2016, CONTEMP STUD ECON FINANC ANAL, V98, P21'],
 3: ['Cai C.W., 2018, ACCOUNT FINANC, V58, P965',
     'Lee I., 2018, BUS HORIZ, V61, P35',
     'Shim Y., 2016, TELECOMMUN POLICY, V40, P168',
     'Zavolokina L., 2016, FINANCIAL INNOV, V2',
     'Zavolokina L., 2016, INT CONF INF SYST ICIS'],
 5: ['Kim Y., 2016, INT J APPL ENG RES, V11, P1058'],
 6: ['Gabor D., 2017, NEW POLIT ECON, V22, P423',
     'Ryu H.-S., 2018, IND MANAGE DATA SYS, V118, P541'],
 7: ['Gracia D.B., 2019, IND MANAGE DATA SYS, V119, P1411'],
 8: ['Anshari M., 2019, ENERGY PROCEDIA, V156, P234',
     'Buchak G., 2018, J FINANC ECON, V130, P453',
     'Deng X., 2019, SUSTAINABILITY, V11',
     'Dorfleitner G., 2017, FINTECH IN GER, P1',
     'Gomber P., 2017, J BUS ECON, V87, P537'],
 9: ['Chen L., 2016, CHINA ECON J, V9, P225',
     'Iman N., 2018, ELECT COMMER RES APPL, V30, P72',
     'Jagtiani J., 2019, FINANC MANAGE, V48, P1009',
     'Leong C., 2017, INT J INF MANAGE, V37, P92',
     'Wonglimpiyarat J., 2017, FORESIGHT, V19, P590']}