Cosine Similarities#

>>> from techminer2.tech_mining.svd.tfidf_matrix import cosine_similarities
>>> cosine_similarities(
...     #
...     # PARAMS:
...     field="nlp_phrases",
...     #
...     # TF PARAMS:
...     is_binary=True,
...     cooc_within=1,
...     #
...     # TF-IDF parameters:
...     norm=None,
...     use_idf=False,
...     smooth_idf=False,
...     sublinear_tf=False,
...     #
...     # ITEM PARAMS:
...     top_n=20,
...     occ_range=(None, None),
...     gc_range=(None, None),
...     custom_items=None,
...     #
...     # SVD PARAMS:
...     n_components=5,
...     algorithm="randomized",
...     n_iter=5,
...     n_oversamples=10,
...     power_iteration_normalizer="auto",
...     random_state=0,
...     tol=0.0,
...     #
...     # DATABASE PARAMS:
...     root_dir="example/",
...     database="main",
...     year_filter=(None, None),
...     cited_by_filter=(None, None),
... ).head()
                                                            cosine_similariries
nlp_phrases
FINTECH 40:6331               IMPACT 09:1495 (0.865); CHALLENGES 09:1473 (0....
TECHNOLOGIES 23:3317          FINANCIAL_SECTOR 09:1733 (0.805); CHALLENGES 0...
AUTHOR 18:2443                DATA 10:1569 (0.983); RESULTS 11:1810 (0.822);...
FINANCIAL_INDUSTRY 17:3704    INNOVATION 13:2298 (0.933); FINANCIAL_SECTOR 0...
FINANCIAL_TECHNOLOGY 17:2225  IMPACT 09:1495 (0.851); CHALLENGES 09:1473 (0....