aboutsummaryrefslogblamecommitdiffstats
path: root/community/py3-scikit-optimize/sklearn-0.24.patch
blob: 278763d47b1270bfd6f8dd1b9866f3ff22052a2f (plain) (tree)






































































































































































































                                                                                                                                                                                                                                                                                     
From 991ba596ffe515b2e972f11bb7cb4818eeb4bead Mon Sep 17 00:00:00 2001
From: Kernc <kerncece@gmail.com>
Date: Mon, 11 Jan 2021 08:28:24 +0100
Subject: [PATCH 1/4] Remove BayesSearchCV(iid=) parameter deprecated in
 sklearn

Fixes https://github.com/scikit-optimize/scikit-optimize/issues/978
---
 skopt/searchcv.py | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 342952c2..500736f4 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -1,3 +1,5 @@
+import warnings
+
 try:
     from collections.abc import Sized
 except ImportError:
@@ -13,7 +15,6 @@
 from joblib import Parallel, delayed
 from sklearn.model_selection._search import BaseSearchCV
 from sklearn.utils import check_random_state
-from sklearn.utils.fixes import MaskedArray
 
 from sklearn.utils.validation import indexable, check_is_fitted
 try:
@@ -115,11 +116,6 @@ class BayesSearchCV(BaseSearchCV):
             - A string, giving an expression as a function of n_jobs,
               as in '2*n_jobs'
 
-    iid : boolean, default=True
-        If True, the data is assumed to be identically distributed across
-        the folds, and the loss minimized is the total loss per sample,
-        and not the mean loss across the folds.
-
     cv : int, cross-validation generator or an iterable, optional
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
@@ -289,7 +285,7 @@ class BayesSearchCV(BaseSearchCV):
 
     def __init__(self, estimator, search_spaces, optimizer_kwargs=None,
                  n_iter=50, scoring=None, fit_params=None, n_jobs=1,
-                 n_points=1, iid=True, refit=True, cv=None, verbose=0,
+                 n_points=1, iid='deprecated', refit=True, cv=None, verbose=0,
                  pre_dispatch='2*n_jobs', random_state=None,
                  error_score='raise', return_train_score=False):
 
@@ -305,9 +301,13 @@ def __init__(self, estimator, search_spaces, optimizer_kwargs=None,
         # in the constructor and be passed in ``fit``.
         self.fit_params = fit_params
 
+        if iid != "deprecated":
+            warnings.warn("The `iid` parameter has been deprecated "
+                          "and will be unused.")
+
         super(BayesSearchCV, self).__init__(
              estimator=estimator, scoring=scoring,
-             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
+             n_jobs=n_jobs, refit=refit, cv=cv, verbose=verbose,
              pre_dispatch=pre_dispatch, error_score=error_score,
              return_train_score=return_train_score)
 
@@ -456,13 +456,7 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
                 results["rank_%s" % key_name] = np.asarray(
                     rankdata(-array_means, method='min'), dtype=np.int32)
 
-        # Computed the (weighted) mean and std for test scores alone
-        # NOTE test_sample counts (weights) remain the same for all candidates
-        test_sample_counts = np.array(test_sample_counts[:n_splits],
-                                      dtype=np.int)
-
-        _store('test_score', test_scores, splits=True, rank=True,
-               weights=test_sample_counts if self.iid else None)
+        _store('test_score', test_scores, splits=True, rank=True)
         if self.return_train_score:
             _store('train_score', train_scores, splits=True)
         _store('fit_time', fit_time)

From 294b7b8132091537fb263b04506a8c0809555cc8 Mon Sep 17 00:00:00 2001
From: Kernc <kerncece@gmail.com>
Date: Tue, 26 Jan 2021 13:52:44 +0100
Subject: [PATCH 2/4] Remove now unused weights= parameter

---
 skopt/searchcv.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 500736f4..31315b90 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -435,7 +435,7 @@ def _fit(self, X, y, groups, parameter_iterable):
 
         results = dict()
 
-        def _store(key_name, array, weights=None, splits=False, rank=False):
+        def _store(key_name, array, splits=False, rank=False):
             """A small helper to store the scores/times to the cv_results_"""
             array = np.array(array, dtype=np.float64).reshape(n_candidates,
                                                               n_splits)
@@ -444,12 +444,12 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
                     results["split%d_%s"
                             % (split_i, key_name)] = array[:, split_i]
 
-            array_means = np.average(array, axis=1, weights=weights)
+            array_means = np.average(array, axis=1)
             results['mean_%s' % key_name] = array_means
             # Weighted std is not directly available in numpy
             array_stds = np.sqrt(np.average((array -
                                              array_means[:, np.newaxis]) ** 2,
-                                            axis=1, weights=weights))
+                                            axis=1))
             results['std_%s' % key_name] = array_stds
 
             if rank:

From 3a0df875156e20a2849134e0583b5066b6c69508 Mon Sep 17 00:00:00 2001
From: kernc <kerncece@gmail.com>
Date: Tue, 26 Jan 2021 13:57:14 +0100
Subject: [PATCH 3/4] Update skopt/searchcv.py

Co-authored-by: Tim Head <betatim@gmail.com>
---
 skopt/searchcv.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index 31315b90..ce8c121d 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -303,7 +303,7 @@ def __init__(self, estimator, search_spaces, optimizer_kwargs=None,
 
         if iid != "deprecated":
             warnings.warn("The `iid` parameter has been deprecated "
-                          "and will be unused.")
+                          "and will be ignored.")
 
         super(BayesSearchCV, self).__init__(
              estimator=estimator, scoring=scoring,

From 3bbbfc2ac397fd97c04e24e44fcd55c8fa6a6de2 Mon Sep 17 00:00:00 2001
From: bole1 <bolenevod@yandex.ru>
Date: Tue, 23 Feb 2021 16:40:31 +0300
Subject: [PATCH 4/4] Fix BayesSearchCV repr/pprint; Fix changed
 sklearn.model_selection._validation._fit_and_score

* Update searchcv.py

Further improvements for kernc previous commits.  iid totally removed to prevent troubles with sklearn utils prettyprinting. prettyprinting looks for params when print to repl. Sklearn 0.24 has changed return value after cv, so i changed dict destructuring(a litttle bit dirty)

* Update searchcv.py

* Revert unrelated changes

* PEP8 format; add comment

* Revert reverting "unrelated changes"

This is required to pass tests/test_searchcv.py
with scikit-learn 0.24+.

Co-authored-by: Kernc <kerncece@gmail.com>
---
 skopt/searchcv.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/skopt/searchcv.py b/skopt/searchcv.py
index ce8c121d..1e4e4e80 100644
--- a/skopt/searchcv.py
+++ b/skopt/searchcv.py
@@ -304,6 +304,7 @@ def __init__(self, estimator, search_spaces, optimizer_kwargs=None,
         if iid != "deprecated":
             warnings.warn("The `iid` parameter has been deprecated "
                           "and will be ignored.")
+        self.iid = iid  # For sklearn repr pprint
 
         super(BayesSearchCV, self).__init__(
              estimator=estimator, scoring=scoring,
@@ -424,11 +425,13 @@ def _fit(self, X, y, groups, parameter_iterable):
 
         # if one choose to see train score, "out" will contain train score info
         if self.return_train_score:
-            (train_scores, test_scores, test_sample_counts,
-             fit_time, score_time, parameters) = zip(*out)
+            (_fit_fail, train_scores, test_scores, test_sample_counts,
+             fit_time, score_time, parameters) = zip(*[dic.values()
+                                                       for dic in out])
         else:
-            (test_scores, test_sample_counts,
-             fit_time, score_time, parameters) = zip(*out)
+            (_fit_fail, test_scores, test_sample_counts,
+             fit_time, score_time, parameters) = zip(*[dic.values()
+                                                       for dic in out])
 
         candidate_params = parameters[::n_splits]
         n_candidates = len(candidate_params)