aboutsummaryrefslogtreecommitdiffstats
path: root/community/py3-scikit-optimize/sklearn-0.24.patch
blob: 278763d47b1270bfd6f8dd1b9866f3ff22052a2f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
From 991ba596ffe515b2e972f11bb7cb4818eeb4bead Mon Sep 17 00:00:00 2001
From: Kernc <kerncece@gmail.com>
Date: Mon, 11 Jan 2021 08:28:24 +0100
Subject: [PATCH 1/4] Remove BayesSearchCV(iid=) parameter deprecated in
 sklearn

Fixes https://github.com/scikit-optimize/scikit-optimize/issues/978
---
 skopt/searchcv.py | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 342952c2..500736f4 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -1,3 +1,5 @@
+import warnings
+
 try:
     from collections.abc import Sized
 except ImportError:
@@ -13,7 +15,6 @@
 from joblib import Parallel, delayed
 from sklearn.model_selection._search import BaseSearchCV
 from sklearn.utils import check_random_state
-from sklearn.utils.fixes import MaskedArray
 
 from sklearn.utils.validation import indexable, check_is_fitted
 try:
@@ -115,11 +116,6 @@ class BayesSearchCV(BaseSearchCV):
             - A string, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
 
-    iid : boolean, default=True
-        If True, the data is assumed to be identically distributed across
-        the folds, and the loss minimized is the total loss per sample,
-        and not the mean loss across the folds.
-
     cv : int, cross-validation generator or an iterable, optional
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -289,7 +285,7 @@ class BayesSearchCV(BaseSearchCV):
 
     def __init__(self, estimator, search_spaces, optimizer_kwargs=None,
                  n_iter=50, scoring=None, fit_params=None, n_jobs=1,
-                 n_points=1, iid=True, refit=True, cv=None, verbose=0,
+                 n_points=1, iid='deprecated', refit=True, cv=None, verbose=0,
                  pre_dispatch='2*n_jobs', random_state=None,
                  error_score='raise', return_train_score=False):
 
@@ -305,9 +301,13 @@ def __init__(self, estimator, search_spaces, optimizer_kwargs=None,
         # in the constructor and be passed in ``fit``.
         self.fit_params = fit_params
 
+        if iid != "deprecated":
+            warnings.warn("The `iid` parameter has been deprecated "
+                          "and will be unused.")
+
         super(BayesSearchCV, self).__init__(
              estimator=estimator, scoring=scoring,
-             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
+             n_jobs=n_jobs, refit=refit, cv=cv, verbose=verbose,
              pre_dispatch=pre_dispatch, error_score=error_score,
              return_train_score=return_train_score)
 
@@ -456,13 +456,7 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
                 results["rank_%s" % key_name] = np.asarray(
                     rankdata(-array_means, method='min'), dtype=np.int32)
 
-        # Computed the (weighted) mean and std for test scores alone
-        # NOTE test_sample counts (weights) remain the same for all candidates
-        test_sample_counts = np.array(test_sample_counts[:n_splits],
-                                      dtype=np.int)
-
-        _store('test_score', test_scores, splits=True, rank=True,
-               weights=test_sample_counts if self.iid else None)
+        _store('test_score', test_scores, splits=True, rank=True)
         if self.return_train_score:
             _store('train_score', train_scores, splits=True)
         _store('fit_time', fit_time)

From 294b7b8132091537fb263b04506a8c0809555cc8 Mon Sep 17 00:00:00 2001
From: Kernc <kerncece@gmail.com>
Date: Tue, 26 Jan 2021 13:52:44 +0100
Subject: [PATCH 2/4] Remove now unused weights= parameter

---
 skopt/searchcv.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 500736f4..31315b90 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -435,7 +435,7 @@ def _fit(self, X, y, groups, parameter_iterable):
 
         results = dict()
 
-        def _store(key_name, array, weights=None, splits=False, rank=False):
+        def _store(key_name, array, splits=False, rank=False):
             """A small helper to store the scores/times to the cv_results_"""
             array = np.array(array, dtype=np.float64).reshape(n_candidates,
                                                               n_splits)
@@ -444,12 +444,12 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
                     results["split%d_%s"
                             % (split_i, key_name)] = array[:, split_i]
 
-            array_means = np.average(array, axis=1, weights=weights)
+            array_means = np.average(array, axis=1)
             results['mean_%s' % key_name] = array_means
             # Weighted std is not directly available in numpy
             array_stds = np.sqrt(np.average((array -
                                              array_means[:, np.newaxis]) ** 2,
-                                            axis=1, weights=weights))
+                                            axis=1))
             results['std_%s' % key_name] = array_stds
 
             if rank:

From 3a0df875156e20a2849134e0583b5066b6c69508 Mon Sep 17 00:00:00 2001
From: kernc <kerncece@gmail.com>
Date: Tue, 26 Jan 2021 13:57:14 +0100
Subject: [PATCH 3/4] Update skopt/searchcv.py

Co-authored-by: Tim Head <betatim@gmail.com>
---
 skopt/searchcv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 31315b90..ce8c121d 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -303,7 +303,7 @@ def __init__(self, estimator, search_spaces, optimizer_kwargs=None,
 
         if iid != "deprecated":
             warnings.warn("The `iid` parameter has been deprecated "
-                          "and will be unused.")
+                          "and will be ignored.")
 
         super(BayesSearchCV, self).__init__(
              estimator=estimator, scoring=scoring,

From 3bbbfc2ac397fd97c04e24e44fcd55c8fa6a6de2 Mon Sep 17 00:00:00 2001
From: bole1 <bolenevod@yandex.ru>
Date: Tue, 23 Feb 2021 16:40:31 +0300
Subject: [PATCH 4/4] Fix BayesSearchCV repr/pprint; Fix changed
 sklearn.model_selection._validation._fit_and_score

* Update searchcv.py

Further improvements for kernc previous commits.  iid totally removed to prevent troubles with sklearn utils prettyprinting. prettyprinting looks for params when print to repl. Sklearn 0.24 has changed return value after cv, so i changed dict destructuring(a litttle bit dirty)

* Update searchcv.py

* Revert unrelated changes

* PEP8 format; add comment

* Revert reverting "unrelated changes"

This is required to pass tests/test_searchcv.py
with scikit-learn 0.24+.

Co-authored-by: Kernc <kerncece@gmail.com>
---
 skopt/searchcv.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index ce8c121d..1e4e4e80 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -304,6 +304,7 @@ def __init__(self, estimator, search_spaces, optimizer_kwargs=None,
         if iid != "deprecated":
             warnings.warn("The `iid` parameter has been deprecated "
                           "and will be ignored.")
+        self.iid = iid  # For sklearn repr pprint
 
         super(BayesSearchCV, self).__init__(
              estimator=estimator, scoring=scoring,
@@ -424,11 +425,13 @@ def _fit(self, X, y, groups, parameter_iterable):
 
         # if one choose to see train score, "out" will contain train score info
         if self.return_train_score:
-            (train_scores, test_scores, test_sample_counts,
-             fit_time, score_time, parameters) = zip(*out)
+            (_fit_fail, train_scores, test_scores, test_sample_counts,
+             fit_time, score_time, parameters) = zip(*[dic.values()
+                                                       for dic in out])
         else:
-            (test_scores, test_sample_counts,
-             fit_time, score_time, parameters) = zip(*out)
+            (_fit_fail, test_scores, test_sample_counts,
+             fit_time, score_time, parameters) = zip(*[dic.values()
+                                                       for dic in out])
 
         candidate_params = parameters[::n_splits]
         n_candidates = len(candidate_params)