Co-occurrence Matrix#

>>> from techminer2.tech_mining.co_occurrence import co_occurrence_matrix
>>> matrix = co_occurrence_matrix(
...     #
...     # FUNCTION PARAMS:
...     columns='author_keywords',
...     rows='authors',
...     #
...     # COLUMN PARAMS:
...     col_top_n=None,
...     col_occ_range=(2, None),
...     col_gc_range=(None, None),
...     col_custom_items=None,
...     #
...     # ROW PARAMS:
...     row_top_n=None,
...     row_occ_range=(2, None),
...     row_gc_range=(None, None),
...     row_custom_items=None,
...     #
...     # DATABASE PARAMS:
...     root_dir="example/",
...     database="main",
...     year_filter=(None, None),
...     cited_by_filter=(None, None),
... )
>>> matrix.df_
author_keywords       FINTECH 31:5168  ...  P2P_LENDING 02:0161
authors                                ...
Jagtiani J. 3:0317                  3  ...                    2
Gomber P. 2:1065                    1  ...                    0
Hornuf L. 2:0358                    2  ...                    0
Gai K. 2:0323                       2  ...                    0
Qiu M. 2:0323                       2  ...                    0
Sun X./3 2:0323                     2  ...                    0
Lemieux C. 2:0253                   2  ...                    1
Dolata M. 2:0181                    2  ...                    0
Schwabe G. 2:0181                   2  ...                    0
Zavolokina L. 2:0181                2  ...                    0

[10 rows x 12 columns]
>>> matrix.heat_map_ 
<pandas.io.formats.style.Styler object ...
>>> matrix.list_cells_.head()
               row              column  matrix_value
0  FINTECH 31:5168  Jagtiani J. 3:0317             3
1  FINTECH 31:5168    Gomber P. 2:1065             1
2  FINTECH 31:5168    Hornuf L. 2:0358             2
3  FINTECH 31:5168       Gai K. 2:0323             2
4  FINTECH 31:5168       Qiu M. 2:0323             2
>>> print(matrix.prompt_) 
Your task is ...
>>> matrix = co_occurrence_matrix(
...     #
...     # FUNCTION PARAMS:
...     columns='author_keywords',
...     rows=None,
...     #
...     # COLUMN PARAMS:
...     col_top_n=10,
...     col_occ_range=(None, None),
...     col_gc_range=(None, None),
...     col_custom_items=None,
...     #
...     # ROW PARAMS:
...     row_top_n=None,
...     row_occ_range=(2, None),
...     row_gc_range=(None, None),
...     row_custom_items=None,
...     #
...     # DATABASE PARAMS:
...     root_dir="example/",
...     database="main",
...     year_filter=(None, None),
...     cited_by_filter=(None, None),
... )
>>> matrix.df_
author_keywords               FINTECH 31:5168  ...  CASE_STUDY 02:0340
author_keywords                                ...
FINTECH 31:5168                            31  ...                   2
INNOVATION 07:0911                          5  ...                   0
FINANCIAL_SERVICES 04:0667                  3  ...                   0
FINANCIAL_INCLUSION 03:0590                 3  ...                   1
FINANCIAL_TECHNOLOGY 03:0461                2  ...                   0
CROWDFUNDING 03:0335                        2  ...                   0
MARKETPLACE_LENDING 03:0317                 3  ...                   0
BUSINESS_MODELS 02:0759                     2  ...                   0
CYBER_SECURITY 02:0342                      2  ...                   0
CASE_STUDY 02:0340                          2  ...                   2

[10 rows x 10 columns]
>>> matrix.list_cells_.head()
               row                        column  matrix_value
0  FINTECH 31:5168               FINTECH 31:5168            31
1  FINTECH 31:5168            INNOVATION 07:0911             5
2  FINTECH 31:5168    FINANCIAL_SERVICES 04:0667             3
3  FINTECH 31:5168   FINANCIAL_INCLUSION 03:0590             3
4  FINTECH 31:5168  FINANCIAL_TECHNOLOGY 03:0461             2
>>> print(matrix.prompt_) 
Your task is ...