sklearn.tree.DecisionTreeRegressor#
Ultima modificación: 2023-03-11 | YouTube
Arbol de decisión para regresión.
[1]:
from sklearn.datasets import load_diabetes
X, y = load_diabetes(return_X_y=True)
[2]:
from sklearn.tree import DecisionTreeRegressor
decisionTreeRegressor = DecisionTreeRegressor(
# --------------------------------------------------------------------------
# The function to measure the quality of a split.
# * 'squared_error': is equal to variance reduction as feature selection
# criterion and minimizes the L2 loss using the mean of each terminal
# node
# * 'friedman_mse': which uses mean squared error with Friedman’s
# improvement score for potential splits
# * 'absolute_error': for the mean absolute error, which minimizes the
# L1 loss using the median of each terminal node
# * 'poisson': which uses reduction in Poisson deviance to find splits.
criterion="squared_error",
# --------------------------------------------------------------------------
#
# El resto de parámetros fueron discutidos en DecisionTreeClassifier
#
# --------------------------------------------------------------------------
# The strategy used to choose the split at each node. Supported strategies
# are “best” to choose the best split and “random” to choose the best
# random split.
splitter="best",
# --------------------------------------------------------------------------
# The maximum depth of the tree. If None, then nodes are expanded until all
# leaves are pure or until all leaves contain less than min_samples_split
# samples.
max_depth=None,
# --------------------------------------------------------------------------
# The minimum number of samples required to split an internal node:
# * If int, then consider min_samples_split as the minimum number.
# * If float, then min_samples_split is a fraction and
# ceil(min_samples_split * n_samples) are the minimum number of samples
# for each split.
min_samples_split=2,
# --------------------------------------------------------------------------
# The minimum number of samples required to be at a leaf node. A split
# point at any depth will only be considered if it leaves at least
# min_samples_leaf training samples in each of the left and right branches.
# This may have the effect of smoothing the model, especially in
# regression.
# * If int, then consider min_samples_leaf as the minimum number.
# * If float, then min_samples_leaf is a fraction and
# ceil(min_samples_leaf * n_samples) are the minimum number of samples
# for each node.
min_samples_leaf=1,
# --------------------------------------------------------------------------
# The minimum weighted fraction of the sum total of weights (of all the
# input samples) required to be at a leaf node. Samples have equal weight
# when sample_weight is not provided.
min_weight_fraction_leaf=0.0,
# --------------------------------------------------------------------------
# The number of features to consider when looking for the best split:
# * If int, then consider max_features features at each split.
# * If float, then max_features is a fraction and
# max(1, int(max_features * n_features_in_)) features are considered at
# each split.
# * If “sqrt”, then max_features=sqrt(n_features).
# * If “log2”, then max_features=log2(n_features).
# * If None, then max_features=n_features.
max_features=None,
# --------------------------------------------------------------------------
# Controls the randomness of the estimator. The features are always
# randomly permuted at each split, even if splitter is set to "best". When
# max_features < n_features, the algorithm will select max_features at
# random at each split before finding the best split among them. But the
# best found split may vary across different runs, even if
# max_features=n_features. That is the case, if the improvement of the
# criterion is identical for several splits and one split has to be
# selected at random. To obtain a deterministic behaviour during fitting,
# random_state has to be fixed to an integer.
random_state=None,
# --------------------------------------------------------------------------
# Grow a tree with max_leaf_nodes in best-first fashion. Best nodes are
# defined as relative reduction in impurity. If None then unlimited number
# of leaf nodes.
max_leaf_nodes=None,
# --------------------------------------------------------------------------
# A node will be split if this split induces a decrease of the impurity
# greater than or equal to this value.
#
# The weighted impurity decrease equation is the following:
#
# N_t / N * (impurity - N_t_R / N_t * right_impurity
# - N_t_L / N_t * left_impurity)
#
# where N is the total number of samples, N_t is the number of samples at
# the current node, N_t_L is the number of samples in the left child, and
# N_t_R is the number of samples in the right child.
#
# N, N_t, N_t_R and N_t_L all refer to the weighted sum, if sample_weight
# is passed.
min_impurity_decrease=0.0,
# --------------------------------------------------------------------------
# Complexity parameter used for Minimal Cost-Complexity Pruning. The
# subtree with the largest cost complexity that is smaller than ccp_alpha
# will be chosen. By default, no pruning is performed.
ccp_alpha=0.0,
)
decisionTreeRegressor.fit(X, y)
decisionTreeRegressor.score(X, y)
[2]:
1.0
[3]:
decisionTreeRegressor.predict(X)
[3]:
array([151., 75., 141., 206., 135., 97., 138., 63., 110., 310., 101.,
69., 179., 185., 118., 171., 166., 144., 97., 168., 68., 49.,
68., 245., 184., 202., 137., 85., 131., 283., 129., 59., 341.,
87., 65., 102., 265., 276., 252., 90., 100., 55., 61., 92.,
259., 53., 190., 142., 75., 142., 155., 225., 59., 104., 182.,
128., 52., 37., 170., 170., 61., 144., 52., 128., 71., 163.,
150., 97., 160., 178., 48., 270., 202., 111., 85., 42., 170.,
200., 252., 113., 143., 51., 52., 210., 65., 141., 55., 134.,
42., 111., 98., 164., 48., 96., 90., 162., 150., 279., 92.,
83., 128., 102., 302., 198., 95., 53., 134., 144., 232., 81.,
104., 59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
173., 180., 84., 121., 161., 99., 109., 115., 268., 274., 158.,
107., 83., 103., 272., 85., 280., 336., 281., 118., 317., 235.,
60., 174., 259., 178., 128., 96., 126., 288., 88., 292., 71.,
197., 186., 25., 84., 96., 195., 53., 217., 172., 131., 214.,
59., 70., 220., 268., 152., 47., 74., 295., 101., 151., 127.,
237., 225., 81., 151., 107., 64., 138., 185., 265., 101., 137.,
143., 141., 79., 292., 178., 91., 116., 86., 122., 72., 129.,
142., 90., 158., 39., 196., 222., 277., 99., 196., 202., 155.,
77., 191., 70., 73., 49., 65., 263., 248., 296., 214., 185.,
78., 93., 252., 150., 77., 208., 77., 108., 160., 53., 220.,
154., 259., 90., 246., 124., 67., 72., 257., 262., 275., 177.,
71., 47., 187., 125., 78., 51., 258., 215., 303., 243., 91.,
150., 310., 153., 346., 63., 89., 50., 39., 103., 308., 116.,
145., 74., 45., 115., 264., 87., 202., 127., 182., 241., 66.,
94., 283., 64., 102., 200., 265., 94., 230., 181., 156., 233.,
60., 219., 80., 68., 332., 248., 84., 200., 55., 85., 89.,
31., 129., 83., 275., 65., 198., 236., 253., 124., 44., 172.,
114., 142., 109., 180., 144., 163., 147., 97., 220., 190., 109.,
191., 122., 230., 242., 248., 249., 192., 131., 237., 78., 135.,
244., 199., 270., 164., 72., 96., 306., 91., 214., 95., 216.,
263., 178., 113., 200., 139., 139., 88., 148., 88., 243., 71.,
77., 109., 272., 60., 54., 221., 90., 311., 281., 182., 321.,
58., 262., 206., 233., 242., 123., 167., 63., 197., 71., 168.,
140., 217., 121., 235., 245., 40., 52., 104., 132., 88., 69.,
219., 72., 201., 110., 51., 277., 63., 118., 69., 273., 258.,
43., 198., 242., 232., 175., 93., 168., 275., 293., 281., 72.,
140., 189., 181., 209., 136., 261., 113., 131., 174., 257., 55.,
84., 42., 146., 212., 233., 91., 111., 152., 120., 67., 310.,
94., 183., 66., 173., 72., 49., 64., 48., 178., 104., 132.,
220., 57.])
[4]:
decisionTreeRegressor.feature_importances_
[4]:
array([0.04278675, 0.00902901, 0.23099855, 0.08696601, 0.08256838,
0.04880831, 0.06316078, 0.01869221, 0.34793949, 0.06905048])