Skip to content

Commit fc33d30

Browse files
rthNicolasHug
authored andcommitted
MAINT Unvendor joblib (scikit-learn#13531)
1 parent 7243cc3 commit fc33d30

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+61
-14071
lines changed

README.rst

+1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ scikit-learn requires:
5050
- Python (>= 3.5)
5151
- NumPy (>= 1.11.0)
5252
- SciPy (>= 0.17.0)
53+
- joblib (>= 0.11)
5354

5455
**Scikit-learn 0.20 was the last version to support Python2.7.**
5556
Scikit-learn 0.21 and later require Python 3.5 or newer.

azure-pipelines.yml

+2-3
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ jobs:
1111
py35_np_atlas:
1212
DISTRIB: 'ubuntu'
1313
PYTHON_VERSION: '3.5'
14-
SKLEARN_SITE_JOBLIB: '1'
1514
JOBLIB_VERSION: '0.11'
1615
SKLEARN_NO_OPENMP: 'True'
1716
# Linux + Python 3.5 build with OpenBLAS and without SITE_JOBLIB
@@ -23,10 +22,11 @@ jobs:
2322
SCIPY_VERSION: '0.17.0'
2423
CYTHON_VERSION: '*'
2524
PILLOW_VERSION: '4.0.0'
25+
# later version of joblib are not packaged in conda for Python 3.5
26+
JOBLIB_VERSION: '0.12.3'
2627
COVERAGE: 'true'
2728
# Linux environment to test the latest available dependencies and MKL.
2829
# It runs tests requiring pandas and PyAMG.
29-
# It also runs with the site joblib instead of the vendored copy of joblib.
3030
pylatest_conda:
3131
DISTRIB: 'conda'
3232
PYTHON_VERSION: '*'
@@ -41,7 +41,6 @@ jobs:
4141
COVERAGE: 'true'
4242
CHECK_PYTEST_SOFT_DEPENDENCY: 'true'
4343
TEST_DOCSTRINGS: 'true'
44-
SKLEARN_SITE_JOBLIB: '1'
4544
CHECK_WARNINGS: 'true'
4645

4746
- template: build_tools/azure/posix.yml

build_tools/azure/install.cmd

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ IF "%PYTHON_ARCH%"=="64" (
1111
call deactivate
1212
@rem Clean up any left-over from a previous build
1313
conda remove --all -q -y -n %VIRTUALENV%
14-
conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython pytest wheel pillow
14+
conda create -n %VIRTUALENV% -q -y python=%PYTHON_VERSION% numpy scipy cython pytest wheel pillow joblib
1515

1616
call activate %VIRTUALENV%
1717
) else (
18-
pip install numpy scipy cython pytest wheel pillow
18+
pip install numpy scipy cython pytest wheel pillow joblib
1919
)
2020
if "%COVERAGE%" == "true" (
2121
pip install coverage codecov pytest-cov

build_tools/azure/install.sh

+1-5
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ make_conda() {
2727
if [[ "$DISTRIB" == "conda" ]]; then
2828
TO_INSTALL="python=$PYTHON_VERSION pip pytest pytest-cov \
2929
numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION \
30-
cython=$CYTHON_VERSION"
30+
cython=$CYTHON_VERSION joblib=$JOBLIB_VERSION"
3131

3232
if [[ "$INSTALL_MKL" == "true" ]]; then
3333
TO_INSTALL="$TO_INSTALL mkl"
@@ -47,10 +47,6 @@ if [[ "$DISTRIB" == "conda" ]]; then
4747
TO_INSTALL="$TO_INSTALL pillow=$PILLOW_VERSION"
4848
fi
4949

50-
if [[ -n "$JOBLIB_VERSION" ]]; then
51-
TO_INSTALL="$TO_INSTALL joblib=$JOBLIB_VERSION"
52-
fi
53-
5450
make_conda $TO_INSTALL
5551

5652
elif [[ "$DISTRIB" == "ubuntu" ]]; then

build_tools/travis/install.sh

-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ elif [[ "$DISTRIB" == "scipy-dev" ]]; then
113113
pip install --pre --upgrade --timeout=60 -f $dev_url numpy scipy pandas cython
114114
echo "Installing joblib master"
115115
pip install https://github.com/joblib/joblib/archive/master.zip
116-
export SKLEARN_SITE_JOBLIB=1
117116
echo "Installing pillow master"
118117
pip install https://github.com/python-pillow/Pillow/archive/master.zip
119118
pip install pytest pytest-cov

conftest.py

+10
Original file line numberDiff line numberDiff line change
@@ -61,3 +61,13 @@ def pytest_collection_modifyitems(config, items):
6161
for item in items:
6262
if isinstance(item, DoctestItem):
6363
item.add_marker(skip_marker)
64+
65+
66+
def pytest_configure(config):
67+
import sys
68+
sys._is_pytest_session = True
69+
70+
71+
def pytest_unconfigure(config):
72+
import sys
73+
del sys._is_pytest_session

doc/modules/computing.rst

+5
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,11 @@ These environment variables should be set before importing scikit-learn.
553553
is supported. In addition, dumps from joblib.Memory might be incompatible,
554554
and you might loose some caches and have to redownload some datasets.
555555

556+
.. deprecated:: 0.21
557+
558+
As of version 0.21 this parameter has no effect, vendored joblib was
559+
removed and site joblib is always used.
560+
556561
:SKLEARN_ASSUME_FINITE:
557562

558563
Sets the default value for the `assume_finite` argument of

doc/whats_new/v0.21.rst

+10
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,16 @@ Multiple modules
634634
:issue:`13422` by :user:`Madhura Parikh <jdnc>` and
635635
:user:`Clément Doumouro <ClemDoum>`.
636636

637+
638+
Dependencies
639+
............
640+
641+
- |Enhancement| Joblib is no longer vendored in scikit-learn, and becomes a
642+
dependency. Minimal supported version is joblib 0.11, however using
643+
version >= 0.13 is strongly recommended.
644+
:issue:`13531` by :user:`Roman Yurchak <rth>`.
645+
646+
637647
Changes to estimator checks
638648
---------------------------
639649

setup.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
SCIPY_MIN_VERSION = '0.17.0'
5858
NUMPY_MIN_VERSION = '1.11.0'
5959

60+
JOBLIB_MIN_VERSION = '0.11'
6061

6162
# Optional setuptools features
6263
# We need to import setuptools early, if we want setuptools features,
@@ -226,7 +227,8 @@ def setup_package():
226227
cmdclass=cmdclass,
227228
install_requires=[
228229
'numpy>={}'.format(NUMPY_MIN_VERSION),
229-
'scipy>={}'.format(SCIPY_MIN_VERSION)
230+
'scipy>={}'.format(SCIPY_MIN_VERSION),
231+
'joblib>={}'.format(JOBLIB_MIN_VERSION)
230232
],
231233
**extra_setuptools_args)
232234

sklearn/externals/README

-3
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
11
This directory contains bundled external dependencies that are updated
22
every once in a while.
33

4-
Note to developers and advanced users: setting the SKLEARN_SITE_JOBLIB to
5-
a non null value will force scikit-learn to use the site joblib.
6-
74
Note for distribution packagers: if you want to remove the duplicated
85
code and depend on a packaged version, we suggest that you simply do a
96
symbolic link in this directory.

sklearn/externals/copy_joblib.sh

-26
This file was deleted.

sklearn/externals/joblib/__init__.py

+11-129
Original file line numberDiff line numberDiff line change
@@ -1,133 +1,15 @@
1-
"""Joblib is a set of tools to provide **lightweight pipelining in
2-
Python**. In particular:
1+
# Import necessary to preserve backward compatibility of pickles
2+
import sys
3+
import warnings
34

4-
1. transparent disk-caching of functions and lazy re-evaluation
5-
(memoize pattern)
5+
from joblib import *
66

7-
2. easy simple parallel computing
87

9-
Joblib is optimized to be **fast** and **robust** in particular on large
10-
data and has specific optimizations for `numpy` arrays. It is
11-
**BSD-licensed**.
8+
msg = ("sklearn.externals.joblib is deprecated in 0.21 and will be removed "
9+
"in 0.23. Please import this functionality directly from joblib, "
10+
"which can be installed with: pip install joblib. If this warning is "
11+
"raised when loading pickled models, you may need to re-serialize "
12+
"those models with scikit-learn 0.21+.")
1213

13-
14-
==================== ===============================================
15-
**Documentation:** https://joblib.readthedocs.io
16-
17-
**Download:** http://pypi.python.org/pypi/joblib#downloads
18-
19-
**Source code:** http://github.com/joblib/joblib
20-
21-
**Report issues:** http://github.com/joblib/joblib/issues
22-
==================== ===============================================
23-
24-
25-
Vision
26-
--------
27-
28-
The vision is to provide tools to easily achieve better performance and
29-
reproducibility when working with long running jobs.
30-
31-
* **Avoid computing twice the same thing**: code is rerun over an
32-
over, for instance when prototyping computational-heavy jobs (as in
33-
scientific development), but hand-crafted solution to alleviate this
34-
issue is error-prone and often leads to unreproducible results
35-
36-
* **Persist to disk transparently**: persisting in an efficient way
37-
arbitrary objects containing large data is hard. Using
38-
joblib's caching mechanism avoids hand-written persistence and
39-
implicitly links the file on disk to the execution context of
40-
the original Python object. As a result, joblib's persistence is
41-
good for resuming an application status or computational job, eg
42-
after a crash.
43-
44-
Joblib addresses these problems while **leaving your code and your flow
45-
control as unmodified as possible** (no framework, no new paradigms).
46-
47-
Main features
48-
------------------
49-
50-
1) **Transparent and fast disk-caching of output value:** a memoize or
51-
make-like functionality for Python functions that works well for
52-
arbitrary Python objects, including very large numpy arrays. Separate
53-
persistence and flow-execution logic from domain logic or algorithmic
54-
code by writing the operations as a set of steps with well-defined
55-
inputs and outputs: Python functions. Joblib can save their
56-
computation to disk and rerun it only if necessary::
57-
58-
>>> from sklearn.externals.joblib import Memory
59-
>>> cachedir = 'your_cache_dir_goes_here'
60-
>>> mem = Memory(cachedir)
61-
>>> import numpy as np
62-
>>> a = np.vander(np.arange(3)).astype(np.float)
63-
>>> square = mem.cache(np.square)
64-
>>> b = square(a) # doctest: +ELLIPSIS
65-
________________________________________________________________________________
66-
[Memory] Calling square...
67-
square(array([[0., 0., 1.],
68-
[1., 1., 1.],
69-
[4., 2., 1.]]))
70-
___________________________________________________________square - 0...s, 0.0min
71-
72-
>>> c = square(a)
73-
>>> # The above call did not trigger an evaluation
74-
75-
2) **Embarrassingly parallel helper:** to make it easy to write readable
76-
parallel code and debug it quickly::
77-
78-
>>> from sklearn.externals.joblib import Parallel, delayed
79-
>>> from math import sqrt
80-
>>> Parallel(n_jobs=1)(delayed(sqrt)(i**2) for i in range(10))
81-
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
82-
83-
84-
3) **Fast compressed Persistence**: a replacement for pickle to work
85-
efficiently on Python objects containing large data (
86-
*joblib.dump* & *joblib.load* ).
87-
88-
..
89-
>>> import shutil ; shutil.rmtree(cachedir)
90-
91-
"""
92-
93-
# PEP0440 compatible formatted version, see:
94-
# https://www.python.org/dev/peps/pep-0440/
95-
#
96-
# Generic release markers:
97-
# X.Y
98-
# X.Y.Z # For bugfix releases
99-
#
100-
# Admissible pre-release markers:
101-
# X.YaN # Alpha release
102-
# X.YbN # Beta release
103-
# X.YrcN # Release Candidate
104-
# X.Y # Final release
105-
#
106-
# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
107-
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
108-
#
109-
__version__ = '0.13.0'
110-
111-
112-
from .memory import Memory, MemorizedResult, register_store_backend
113-
from .logger import PrintTime
114-
from .logger import Logger
115-
from .hashing import hash
116-
from .numpy_pickle import dump
117-
from .numpy_pickle import load
118-
from .compressor import register_compressor
119-
from .parallel import Parallel
120-
from .parallel import delayed
121-
from .parallel import cpu_count
122-
from .parallel import register_parallel_backend
123-
from .parallel import parallel_backend
124-
from .parallel import effective_n_jobs
125-
126-
from .externals.loky import wrap_non_picklable_objects
127-
128-
129-
__all__ = ['Memory', 'MemorizedResult', 'PrintTime', 'Logger', 'hash', 'dump',
130-
'load', 'Parallel', 'delayed', 'cpu_count', 'effective_n_jobs',
131-
'register_parallel_backend', 'parallel_backend',
132-
'register_store_backend', 'register_compressor',
133-
'wrap_non_picklable_objects']
14+
if not hasattr(sys, "_is_pytest_session"):
15+
warnings.warn(msg, category=DeprecationWarning)

sklearn/externals/joblib/_compat.py

-19
This file was deleted.

0 commit comments

Comments
 (0)