Skip to content

Commit

Permalink
[SPARK-29672][PYSPARK] update spark testing framework to use python3
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

remove python2.7 tests and test infra for 3.0+

### Why are the changes needed?

because python2.7 is finally going the way of the dodo.

### Does this PR introduce any user-facing change?

newp.

### How was this patch tested?

the build system will test this

Closes apache#26330 from shaneknapp/remove-py27-tests.

Lead-authored-by: shane knapp <[email protected]>
Co-authored-by: shane <[email protected]>
Signed-off-by: shane knapp <[email protected]>
  • Loading branch information
shaneknapp committed Nov 14, 2019
1 parent e46e487 commit 04e99c1
Show file tree
Hide file tree
Showing 12 changed files with 43 additions and 46 deletions.
2 changes: 0 additions & 2 deletions dev/pip-sanity-check.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
# limitations under the License.
#

from __future__ import print_function

from pyspark.sql import SparkSession
from pyspark.mllib.linalg import *
import sys
Expand Down
23 changes: 9 additions & 14 deletions dev/run-pip-tests
Original file line number Diff line number Diff line change
Expand Up @@ -39,21 +39,16 @@ PYTHON_EXECS=()
# Some systems don't have pip or virtualenv - in those cases our tests won't work.
if hash virtualenv 2>/dev/null && [ ! -n "$USE_CONDA" ]; then
echo "virtualenv installed - using. Note if this is a conda virtual env you may wish to set USE_CONDA"
# Figure out which Python execs we should test pip installation with
if hash python2 2>/dev/null; then
# We do this since we are testing with virtualenv and the default virtual env python
# is in /usr/bin/python
PYTHON_EXECS+=('python2')
elif hash python 2>/dev/null; then
# If python2 isn't installed fallback to python if available
PYTHON_EXECS+=('python')
fi
# test only against python3
if hash python3 2>/dev/null; then
PYTHON_EXECS+=('python3')
PYTHON_EXECS=('python3')
else
echo "Python3 not installed on system, skipping pip installability tests"
exit 0
fi
elif hash conda 2>/dev/null; then
echo "Using conda virtual environments"
PYTHON_EXECS=('3.5')
PYTHON_EXECS=('3.6')
USE_CONDA=1
else
echo "Missing virtualenv & conda, skipping pip installability tests"
Expand Down Expand Up @@ -97,7 +92,7 @@ for python in "${PYTHON_EXECS[@]}"; do
cd "$FWDIR"/python
# Delete the egg info file if it exists, this can cache the setup file.
rm -rf pyspark.egg-info || echo "No existing egg info file, skipping deletion"
python setup.py sdist
python3 setup.py sdist


echo "Installing dist into virtual env"
Expand All @@ -117,9 +112,9 @@ for python in "${PYTHON_EXECS[@]}"; do
echo "Run basic sanity check on pip installed version with spark-submit"
spark-submit "$FWDIR"/dev/pip-sanity-check.py
echo "Run basic sanity check with import based"
python "$FWDIR"/dev/pip-sanity-check.py
python3 "$FWDIR"/dev/pip-sanity-check.py
echo "Run the tests for context.py"
python "$FWDIR"/python/pyspark/context.py
python3 "$FWDIR"/python/pyspark/context.py

cd "$FWDIR"

Expand Down
6 changes: 3 additions & 3 deletions dev/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@
FWDIR="$(cd "`dirname $0`"/..; pwd)"
cd "$FWDIR"

PYTHON_VERSION_CHECK=$(python -c 'import sys; print(sys.version_info < (2, 7, 0))')
PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 6, 0))')
if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then
echo "Python versions prior to 2.7 are not supported."
echo "Python versions prior to 3.6 are not supported."
exit -1
fi

exec python -u ./dev/run-tests.py "$@"
exec python3 -u ./dev/run-tests.py "$@"
8 changes: 5 additions & 3 deletions dev/run-tests-jenkins
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@
FWDIR="$( cd "$( dirname "$0" )/.." && pwd )"
cd "$FWDIR"

PYTHON_VERSION_CHECK=$(python -c 'import sys; print(sys.version_info < (2, 7, 0))')
export PATH=/home/anaconda/envs/py36/bin:$PATH

PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 6, 0))')
if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then
echo "Python versions prior to 2.7 are not supported."
echo "Python versions prior to 3.6 are not supported."
exit -1
fi

exec python -u ./dev/run-tests-jenkins.py "$@"
exec python3 -u ./dev/run-tests-jenkins.py "$@"
3 changes: 1 addition & 2 deletions dev/run-tests-jenkins.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3

#
# Licensed to the Apache Software Foundation (ASF) under one or more
Expand All @@ -17,7 +17,6 @@
# limitations under the License.
#

from __future__ import print_function
import os
import sys
import json
Expand Down
5 changes: 2 additions & 3 deletions dev/run-tests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3

#
# Licensed to the Apache Software Foundation (ASF) under one or more
Expand All @@ -17,7 +17,6 @@
# limitations under the License.
#

from __future__ import print_function
import itertools
from argparse import ArgumentParser
import os
Expand Down Expand Up @@ -265,7 +264,7 @@ def exec_sbt(sbt_args=()):
echo_proc.wait()
for line in iter(sbt_proc.stdout.readline, b''):
if not sbt_output_filter.match(line):
print(line, end='')
print(line.decode('utf-8'), end='')
retcode = sbt_proc.wait()

if retcode != 0:
Expand Down
6 changes: 2 additions & 4 deletions dev/sparktestsupport/shellutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,12 @@
# limitations under the License.
#

from __future__ import print_function
import os
import shutil
import subprocess
import sys

subprocess_check_output = subprocess.check_output
subprocess_check_call = subprocess.check_call


def exit_from_command_with_retcode(cmd, retcode):
Expand Down Expand Up @@ -55,9 +53,9 @@ def run_cmd(cmd, return_output=False):
cmd = cmd.split()
try:
if return_output:
return subprocess_check_output(cmd).decode(sys.getdefaultencoding())
return subprocess_check_output(cmd).decode('utf-8')
else:
return subprocess_check_call(cmd)
return subprocess.run(cmd, universal_newlines=True, check=True)
except subprocess.CalledProcessError as e:
exit_from_command_with_retcode(e.cmd, e.returncode)

Expand Down
2 changes: 0 additions & 2 deletions python/pyspark/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
# limitations under the License.
#

from __future__ import print_function

import os
import shutil
import signal
Expand Down
2 changes: 1 addition & 1 deletion python/pyspark/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3

#
# Licensed to the Apache Software Foundation (ASF) under one or more
Expand Down
8 changes: 7 additions & 1 deletion python/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,10 @@
FWDIR="$(cd "`dirname $0`"/..; pwd)"
cd "$FWDIR"

exec python -u ./python/run-tests.py "$@"
PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 6, 0))')
if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then
echo "Python versions prior to 3.6 are not supported."
exit -1
fi

exec python3 -u ./python/run-tests.py "$@"
17 changes: 10 additions & 7 deletions python/run-tests.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3

#
# Licensed to the Apache Software Foundation (ASF) under one or more
Expand All @@ -17,7 +17,6 @@
# limitations under the License.
#

from __future__ import print_function
import logging
from argparse import ArgumentParser
import os
Expand Down Expand Up @@ -160,11 +159,15 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):


def get_default_python_executables():
python_execs = [x for x in ["python2.7", "python3.6", "pypy"] if which(x)]
if "python2.7" not in python_execs:
LOGGER.warning("Not testing against `python2.7` because it could not be found; falling"
" back to `python` instead")
python_execs.insert(0, "python")
python_execs = [x for x in ["python3.6", "python2.7", "pypy"] if which(x)]

if "python3.6" not in python_execs:
p = which("python3")
if not p:
LOGGER.error("No python3 executable found. Exiting!")
os._exit(1)
else:
python_execs.insert(0, p)
return python_execs


Expand Down
7 changes: 3 additions & 4 deletions python/setup.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3

#
# Licensed to the Apache Software Foundation (ASF) under one or more
Expand All @@ -16,15 +16,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function
import glob
import os
import sys
from setuptools import setup
from shutil import copyfile, copytree, rmtree

if sys.version_info < (2, 7):
print("Python versions prior to 2.7 are not supported for pip installed PySpark.",
if sys.version_info < (3, 6):
print("Python versions prior to 3.6 are not supported for pip installed PySpark.",
file=sys.stderr)
sys.exit(-1)

Expand Down

0 comments on commit 04e99c1

Please sign in to comment.