Skip to content

Commit ae2eb70

Browse files
authored
fix: insert user imports before runtime site-packages (#2073)
Previously, all the user import paths were put at the end of sys.path. This was done so that user import paths didn't hide stdlib modules. However, a side-effect is that user import paths came after the runtime's site-packages directory. This prevented user imports from overriding non-stdlib modules included in a runtime (e.g. pip). To fix, we look for the runtime site-packages directory, then insert the user import paths before it. A test is used to ensure that the ordering is `[stdlib, user, runtime site-packages]` Also fixes a bug introduced by #2076: safe path was being disabled by default Fixes #2064
1 parent ecad092 commit ae2eb70

7 files changed

+207
-25
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ A brief description of the categories of changes:
3535
Windows. See [#1840](https://github.com/bazelbuild/rules_python/issues/1840).
3636
* (rules) Fixes Mac + `--build_python_zip` + {obj}`--bootstrap_impl=script`
3737
([#2030](https://github.com/bazelbuild/rules_python/issues/2030)).
38+
* (rules) User dependencies come before runtime site-packages when using
39+
{obj}`--bootstrap_impl=script`.
40+
([#2064](https://github.com/bazelbuild/rules_python/issues/2064)).
3841
* (pip) Fixed pypi parse_simpleapi_html function for feeds with package metadata
3942
containing ">" sign
4043

python/private/stage1_bootstrap_template.sh

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,13 @@ declare -a interpreter_args
106106
# See: https://docs.python.org/3.11/using/cmdline.html#envvar-PYTHONSAFEPATH
107107
# NOTE: Only works for 3.11+
108108
# We inherit the value from the outer environment in case the user wants to
109-
# opt-out of using PYTHONSAFEPATH.
110-
# Because empty means false and non-empty means true, we have to distinguish
111-
# between "defined and empty" and "not defined at all".
109+
# opt-out of using PYTHONSAFEPATH. To opt-out, they have to set
110+
# `PYTHONSAFEPATH=` (empty string). This is because Python treats the empty
111+
# value as false, and any non-empty value as true.
112+
# ${FOO+WORD} expands to empty if $FOO is undefined, and WORD otherwise.
112113
if [[ -z "${PYTHONSAFEPATH+x}" ]]; then
113-
interpreter_env+=("PYTHONSAFEPATH=${PYTHONSAFEPATH+1}")
114+
# ${FOO-WORD} expands to WORD if $FOO is undefined, and $FOO otherwise
115+
interpreter_env+=("PYTHONSAFEPATH=${PYTHONSAFEPATH-1}")
114116
fi
115117

116118
if [[ "$IS_ZIPFILE" == "1" ]]; then

python/private/stage2_bootstrap_template.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,9 +498,29 @@ def main():
498498
cov_tool = None
499499

500500
sys.stdout.flush()
501+
502+
# Add the user imports after the stdlib, but before the runtime's
503+
# site-packages directory. This gives the stdlib precedence, while allowing
504+
# users to override non-stdlib packages that may have been bundled with
505+
# the runtime (usually pip).
506+
# NOTE: There isn't a good way to identify the stdlib paths, so we just
507+
# expect site-packages comes after it, per
508+
# https://docs.python.org/3/library/sys_path_init.html#sys-path-init
509+
for i, path in enumerate(sys.path):
510+
# dist-packages is a debian convention, see
511+
# https://wiki.debian.org/Python#Deviations_from_upstream
512+
if os.path.basename(path) in ("site-packages", "dist-packages"):
513+
sys.path[i:i] = python_path_entries
514+
break
515+
else:
516+
# Otherwise, no site-packages directory was found, which is odd but ok.
517+
sys.path.extend(python_path_entries)
518+
501519
# NOTE: The sys.path must be modified before coverage is imported/activated
520+
# NOTE: Perform this after the user imports are appended. This avoids a
521+
# user import accidentally triggering the site-packages logic above.
502522
sys.path[0:0] = prepend_path_entries
503-
sys.path.extend(python_path_entries)
523+
504524
with _maybe_collect_coverage(enable=cov_tool is not None):
505525
# The first arg is this bootstrap, so drop that for the re-invocation.
506526
_run_py(main_filename, args=sys.argv[1:])

tests/base_rules/BUILD.bazel

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414

1515
load("//python/private:util.bzl", "IS_BAZEL_7_OR_HIGHER") # buildifier: disable=bzl-visibility
16-
load("//tests/support:sh_py_run_test.bzl", "sh_py_run_test")
16+
load("//tests/support:sh_py_run_test.bzl", "py_reconfig_test", "sh_py_run_test")
1717

1818
_SUPPORTS_BOOTSTRAP_SCRIPT = select({
1919
"@platforms//os:windows": ["@platforms//:incompatible"],
@@ -52,6 +52,25 @@ sh_py_run_test(
5252
target_compatible_with = _SUPPORTS_BOOTSTRAP_SCRIPT,
5353
)
5454

55+
py_reconfig_test(
56+
name = "sys_path_order_bootstrap_script_test",
57+
srcs = ["sys_path_order_test.py"],
58+
bootstrap_impl = "script",
59+
env = {"BOOTSTRAP": "script"},
60+
imports = ["./site-packages"],
61+
main = "sys_path_order_test.py",
62+
target_compatible_with = _SUPPORTS_BOOTSTRAP_SCRIPT,
63+
)
64+
65+
py_reconfig_test(
66+
name = "sys_path_order_bootstrap_system_python_test",
67+
srcs = ["sys_path_order_test.py"],
68+
bootstrap_impl = "system_python",
69+
env = {"BOOTSTRAP": "system_python"},
70+
imports = ["./site-packages"],
71+
main = "sys_path_order_test.py",
72+
)
73+
5574
sh_py_run_test(
5675
name = "inherit_pythonsafepath_env_test",
5776
bootstrap_impl = "script",

tests/base_rules/inherit_pythonsafepath_env_test.sh

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,35 @@ function expect_match() {
3535
local expected_pattern=$1
3636
local actual=$2
3737
if ! (echo "$actual" | grep "$expected_pattern" ) >/dev/null; then
38-
echo "expected output to match: $expected_pattern"
39-
echo "but got:\n$actual"
38+
echo "expected to match: $expected_pattern"
39+
echo "===== actual START ====="
40+
echo "$actual"
41+
echo "===== actual END ====="
42+
echo
43+
touch EXPECTATION_FAILED
4044
return 1
4145
fi
4246
}
4347

4448

49+
echo "Check inherited and disabled"
50+
# Verify setting it to empty string disables safe path
4551
actual=$(PYTHONSAFEPATH= $bin 2>&1)
4652
expect_match "sys.flags.safe_path: False" "$actual"
4753
expect_match "PYTHONSAFEPATH: EMPTY" "$actual"
4854

55+
echo "Check inherited and propagated"
56+
# Verify setting it to any string enables safe path and that
57+
# value is propagated
4958
actual=$(PYTHONSAFEPATH=OUTER $bin 2>&1)
5059
expect_match "sys.flags.safe_path: True" "$actual"
5160
expect_match "PYTHONSAFEPATH: OUTER" "$actual"
61+
62+
echo "Check enabled by default"
63+
# Verifying doing nothing leaves safepath enabled by default
64+
actual=$($bin 2>&1)
65+
expect_match "sys.flags.safe_path: True" "$actual"
66+
expect_match "PYTHONSAFEPATH: 1" "$actual"
67+
68+
# Exit if any of the expects failed
69+
[[ ! -e EXPECTATION_FAILED ]]
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Copyright 2024 The Bazel Authors. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os.path
16+
import re
17+
import sys
18+
import unittest
19+
20+
21+
class SysPathOrderTest(unittest.TestCase):
22+
def test_sys_path_order(self):
23+
last_stdlib = None
24+
first_user = None
25+
first_runtime_site = None
26+
27+
# Classify paths into the three different types we care about: stdlib,
28+
# user dependency, or the runtime's site-package's directory.
29+
#
30+
# Because they often share common prefixes with one another, and vary
31+
# subtly between platforms, we do this in two passes: first categorize,
32+
# then pick out the indexes. This is just so debugging is easier and
33+
# error messages are more informative.
34+
categorized_paths = []
35+
for i, value in enumerate(sys.path):
36+
# The runtime's root repo may be added to sys.path, but it
37+
# counts as a user directory, not stdlib directory.
38+
if value == sys.prefix:
39+
category = "user"
40+
elif value.startswith(sys.prefix):
41+
# The runtime's site-package directory might be called
42+
# dist-packages when using Debian's system python.
43+
if os.path.basename(value).endswith("-packages"):
44+
category = "runtime-site"
45+
else:
46+
category = "stdlib"
47+
else:
48+
category = "user"
49+
50+
categorized_paths.append((category, value))
51+
52+
for i, (category, _) in enumerate(categorized_paths):
53+
if category == "stdlib":
54+
last_stdlib = i
55+
elif category == "runtime-site":
56+
if first_runtime_site is None:
57+
first_runtime_site = i
58+
elif category == "user":
59+
if first_user is None:
60+
first_user = i
61+
62+
sys_path_str = "\n".join(
63+
f"{i}: ({category}) {value}"
64+
for i, (category, value) in enumerate(categorized_paths)
65+
)
66+
if None in (last_stdlib, first_user, first_runtime_site):
67+
self.fail(
68+
"Failed to find position for one of:\n"
69+
+ f"{last_stdlib=} {first_user=} {first_runtime_site=}\n"
70+
+ f"for sys.path:\n{sys_path_str}"
71+
)
72+
73+
if os.environ["BOOTSTRAP"] == "script":
74+
self.assertTrue(
75+
last_stdlib < first_user < first_runtime_site,
76+
f"Expected {last_stdlib=} < {first_user=} < {first_runtime_site=}\n"
77+
+ f"for sys.path:\n{sys_path_str}",
78+
)
79+
else:
80+
self.assertTrue(
81+
first_user < last_stdlib < first_runtime_site,
82+
f"Expected {first_user=} < {last_stdlib=} < {first_runtime_site=}\n"
83+
+ f"for sys.path:\n{sys_path_str}",
84+
)
85+
86+
87+
if __name__ == "__main__":
88+
unittest.main()

tests/support/sh_py_run_test.bzl

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ without the overhead of a bazel-in-bazel integration test.
1818
"""
1919

2020
load("//python:py_binary.bzl", "py_binary")
21+
load("//python:py_test.bzl", "py_test")
2122

2223
def _perform_transition_impl(input_settings, attr):
2324
settings = dict(input_settings)
@@ -37,7 +38,7 @@ _perform_transition = transition(
3738
],
3839
)
3940

40-
def _transition_impl(ctx):
41+
def _py_reconfig_impl(ctx):
4142
default_info = ctx.attr.target[DefaultInfo]
4243
exe_ext = default_info.files_to_run.executable.extension
4344
if exe_ext:
@@ -66,27 +67,58 @@ def _transition_impl(ctx):
6667
DefaultInfo(
6768
executable = executable,
6869
files = depset(default_outputs),
69-
runfiles = default_info.default_runfiles,
70+
# On windows, the other default outputs must also be included
71+
# in runfiles so the exe launcher can find the backing file.
72+
runfiles = ctx.runfiles(default_outputs).merge(
73+
default_info.default_runfiles,
74+
),
7075
),
7176
testing.TestEnvironment(
7277
environment = ctx.attr.env,
7378
),
7479
]
7580

76-
transition_binary = rule(
77-
implementation = _transition_impl,
78-
attrs = {
79-
"bootstrap_impl": attr.string(),
80-
"build_python_zip": attr.string(default = "auto"),
81-
"env": attr.string_dict(),
82-
"target": attr.label(executable = True, cfg = "target"),
83-
"_allowlist_function_transition": attr.label(
84-
default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
85-
),
86-
},
87-
cfg = _perform_transition,
88-
executable = True,
89-
)
81+
def _make_reconfig_rule(**kwargs):
82+
return rule(
83+
implementation = _py_reconfig_impl,
84+
attrs = {
85+
"bootstrap_impl": attr.string(),
86+
"build_python_zip": attr.string(default = "auto"),
87+
"env": attr.string_dict(),
88+
"target": attr.label(executable = True, cfg = "target"),
89+
"_allowlist_function_transition": attr.label(
90+
default = "@bazel_tools//tools/allowlists/function_transition_allowlist",
91+
),
92+
},
93+
cfg = _perform_transition,
94+
**kwargs
95+
)
96+
97+
_py_reconfig_binary = _make_reconfig_rule(executable = True)
98+
99+
_py_reconfig_test = _make_reconfig_rule(test = True)
100+
101+
def py_reconfig_test(*, name, **kwargs):
102+
"""Create a py_test with customized build settings for testing.
103+
104+
Args:
105+
name: str, name of teset target.
106+
**kwargs: kwargs to pass along to _py_reconfig_test and py_test.
107+
"""
108+
reconfig_kwargs = {}
109+
reconfig_kwargs["bootstrap_impl"] = kwargs.pop("bootstrap_impl")
110+
reconfig_kwargs["env"] = kwargs.get("env")
111+
inner_name = "_{}_inner" + name
112+
_py_reconfig_test(
113+
name = name,
114+
target = inner_name,
115+
**reconfig_kwargs
116+
)
117+
py_test(
118+
name = inner_name,
119+
tags = ["manual"],
120+
**kwargs
121+
)
90122

91123
def sh_py_run_test(*, name, sh_src, py_src, **kwargs):
92124
bin_name = "_{}_bin".format(name)
@@ -102,7 +134,7 @@ def sh_py_run_test(*, name, sh_src, py_src, **kwargs):
102134
},
103135
)
104136

105-
transition_binary(
137+
_py_reconfig_binary(
106138
name = bin_name,
107139
tags = ["manual"],
108140
target = "_{}_plain_bin".format(name),

0 commit comments

Comments
 (0)