Skip to content

Commit 5511adf

Browse files
authored
Generate non-conflicting qualified Python names (#1059)
Fixes #1045 Replacement for #1051
1 parent e05e06b commit 5511adf

File tree

4 files changed

+124
-14
lines changed

4 files changed

+124
-14
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313
* The compiler will issue a warning when adding any alias that might conflict with any other alias (#1045)
1414

1515
### Fixed
16-
* Basilisp now respects the value of Python's `sys.dont_write_bytecode` flag when generating bytecode (#1054)
16+
* Fix a bug where Basilisp did not respect the value of Python's `sys.dont_write_bytecode` flag when generating bytecode (#1054)
17+
* Fix a bug where Basilisp import names existed in the same namespace as `def` names, which caused some unexpected behavior (#1045)
1718

1819
## [v0.2.2]
1920
### Added

src/basilisp/lang/compiler/analyzer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2595,6 +2595,10 @@ def _import_ast(form: ISeq, ctx: AnalyzerContext) -> Import:
25952595
"Python module alias must be a symbol", form=f
25962596
)
25972597
module_alias = module_alias_sym.name
2598+
if "." in module_alias:
2599+
raise ctx.AnalyzerException(
2600+
"Python module alias must not contain '.'", form=f
2601+
)
25982602

25992603
ctx.put_new_symbol(
26002604
module_alias_sym,

src/basilisp/lang/compiler/generator.py

Lines changed: 72 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# pylint: disable=too-many-lines
22

33
import ast
4+
import base64
45
import collections
56
import contextlib
67
import functools
8+
import hashlib
79
import logging
810
import re
911
import uuid
@@ -2275,6 +2277,43 @@ def _if_to_py_ast(ctx: GeneratorContext, node: If) -> GeneratedPyAST[ast.expr]:
22752277
)
22762278

22772279

2280+
_IMPORT_HASH_TRANSLATE_TABLE = str.maketrans({"=": "", "+": "", "/": ""})
2281+
2282+
2283+
@functools.lru_cache
2284+
def _import_hash(s: str) -> str:
2285+
"""Generate a short, consistent, hash which can be appended to imported module
2286+
names to effectively separate them from objects of the same name defined in the
2287+
module.
2288+
2289+
Aliases in Clojure exist in a separate "namespace" from interned values, but
2290+
Basilisp generates Python modules (which are essentially just a single shared
2291+
namespace), so it is possible that imported module names could clash with `def`'ed
2292+
names.
2293+
2294+
Below, we generate a truncated URL-safe Base64 representation of the MD5 hash of
2295+
the input string (typically the first '.' delimited component of the potentially
2296+
qualified module name), removing any '-' characters since those are not safe for
2297+
Python identifiers.
2298+
2299+
The hash doesn't need to be cryptographically secure, but it does need to be
2300+
consistent across sessions such that when cached namespace modules are reloaded,
2301+
the new session can find objects generated by the session which generated the
2302+
cache file. Since we are not concerned with being able to round-trip this data,
2303+
destructive modifications are not an issue."""
2304+
digest = hashlib.md5(s.encode()).digest()
2305+
return base64.b64encode(digest).decode().translate(_IMPORT_HASH_TRANSLATE_TABLE)[:6]
2306+
2307+
2308+
def _import_name(root: str, *submodules: str) -> Tuple[str, str]:
2309+
"""Return a tuple of the root import name (with hash suffix) for an import and the
2310+
full import name (if submodules are provided)."""
2311+
safe_root = f"{root}_{_import_hash(root)}"
2312+
if not submodules:
2313+
return safe_root, safe_root
2314+
return safe_root, ".".join([safe_root, *submodules])
2315+
2316+
22782317
@_with_ast_loc_deps
22792318
def _import_to_py_ast(ctx: GeneratorContext, node: Import) -> GeneratedPyAST[ast.expr]:
22802319
"""Return a Python AST node for a Basilisp `import*` expression."""
@@ -2290,10 +2329,11 @@ def _import_to_py_ast(ctx: GeneratorContext, node: Import) -> GeneratedPyAST[ast
22902329
# import if parent and child are both imported:
22912330
# (import* collections collections.abc)
22922331
if alias.alias is not None:
2293-
py_import_alias = munge(alias.alias)
2332+
py_import_alias, _ = _import_name(munge(alias.alias))
22942333
import_func = _IMPORTLIB_IMPORT_MODULE_FN_NAME
22952334
else:
2296-
py_import_alias = safe_name.split(".", maxsplit=1)[0]
2335+
py_import_alias, *submodules = safe_name.split(".", maxsplit=1)
2336+
py_import_alias, _ = _import_name(py_import_alias, *submodules)
22972337
import_func = _BUILTINS_IMPORT_FN_NAME
22982338

22992339
ctx.symbol_table.context_boundary.new_symbol(
@@ -3268,26 +3308,45 @@ def _interop_prop_to_py_ast(
32683308

32693309
@_with_ast_loc
32703310
def _maybe_class_to_py_ast(
3271-
_: GeneratorContext, node: MaybeClass
3311+
ctx: GeneratorContext, node: MaybeClass
32723312
) -> GeneratedPyAST[ast.expr]:
3273-
"""Generate a Python AST node for accessing a potential Python module
3274-
variable name."""
3313+
"""Generate a Python AST node for accessing a potential Python module variable
3314+
name."""
32753315
assert node.op == NodeOp.MAYBE_CLASS
3276-
return GeneratedPyAST(
3277-
node=ast.Name(id=_MODULE_ALIASES.get(node.class_, node.class_), ctx=ast.Load())
3278-
)
3316+
if (mod_name := _MODULE_ALIASES.get(node.class_)) is None:
3317+
current_ns = ctx.current_ns
3318+
3319+
# For imported modules only, we should generate the name reference using a
3320+
# unique, consistent hash name (just as they are imported) to avoid clashing
3321+
# with names def'ed later in the namespace.
3322+
name = sym.symbol(node.form.name)
3323+
if (alias := current_ns.import_aliases.val_at(name)) is not None:
3324+
_, mod_name = _import_name(munge(alias.name))
3325+
elif name in current_ns.imports:
3326+
root, *submodules = node.class_.split(".", maxsplit=1)
3327+
_, mod_name = _import_name(root, *submodules)
3328+
3329+
# Names which are not module references should be passed through.
3330+
if mod_name is None:
3331+
mod_name = node.class_
3332+
3333+
return GeneratedPyAST(node=ast.Name(id=mod_name, ctx=ast.Load()))
32793334

32803335

32813336
@_with_ast_loc
32823337
def _maybe_host_form_to_py_ast(
32833338
_: GeneratorContext, node: MaybeHostForm
32843339
) -> GeneratedPyAST[ast.expr]:
3285-
"""Generate a Python AST node for accessing a potential Python module
3286-
variable name with a namespace."""
3340+
"""Generate a Python AST node for accessing a potential Python module variable name
3341+
with a namespace."""
32873342
assert node.op == NodeOp.MAYBE_HOST_FORM
3288-
return GeneratedPyAST(
3289-
node=_load_attr(f"{_MODULE_ALIASES.get(node.class_, node.class_)}.{node.field}")
3290-
)
3343+
if (mod_name := _MODULE_ALIASES.get(node.class_)) is None:
3344+
# At import time, the compiler generates a unique, consistent name for the root
3345+
# level Python name to avoid clashing with names later def'ed in the namespace.
3346+
# This is the same logic applied to completing the reference.
3347+
root, *submodules = node.class_.split(".", maxsplit=1)
3348+
__, mod_name = _import_name(root, *submodules)
3349+
return GeneratedPyAST(node=_load_attr(f"{mod_name}.{node.field}"))
32913350

32923351

32933352
#########################

tests/basilisp/compiler_test.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3311,6 +3311,10 @@ def test_import_module_must_be_symbol(self, lcompile: CompileFn, code: str):
33113311
with pytest.raises(compiler.CompilerException):
33123312
lcompile(code)
33133313

3314+
def test_import_alias_may_not_contain_dot(self, lcompile: CompileFn):
3315+
with pytest.raises(compiler.CompilerException):
3316+
lcompile("(import* [re :as reg.expr])")
3317+
33143318
@pytest.mark.parametrize(
33153319
"code",
33163320
[
@@ -6225,6 +6229,48 @@ def test_aliased_namespace_not_hidden_by_python_module(
62256229
monkeypatch.chdir(cwd)
62266230
os.unlink(module_file_path)
62276231

6232+
@pytest.mark.parametrize(
6233+
"code",
6234+
[
6235+
"""
6236+
(import re)
6237+
6238+
(defn re [s]
6239+
(re/fullmatch #"[A-Z]+" s))
6240+
6241+
(re "YES")
6242+
""",
6243+
"""
6244+
(import [re :as regex])
6245+
6246+
(defn regex [s]
6247+
(regex/fullmatch #"[A-Z]+" s))
6248+
6249+
(regex "YES")
6250+
""",
6251+
],
6252+
)
6253+
def test_imports_dont_shadow_def_names(self, lcompile: CompileFn, code: str):
6254+
match = lcompile(code)
6255+
assert match is not None
6256+
6257+
def test_imports_names_resolve(self, lcompile: CompileFn):
6258+
import re
6259+
6260+
assert lcompile("(import re) re") is re
6261+
assert lcompile("(import [re :as regex]) regex") is re
6262+
6263+
import urllib.parse
6264+
6265+
assert (
6266+
lcompile("(import urllib.parse) urllib.parse/urlparse")
6267+
is urllib.parse.urlparse
6268+
)
6269+
assert (
6270+
lcompile("(import [urllib.parse :as urlparse]) urlparse/urlparse")
6271+
is urllib.parse.urlparse
6272+
)
6273+
62286274
def test_aliased_var_does_not_resolve(
62296275
self, lcompile: CompileFn, ns: runtime.Namespace
62306276
):

0 commit comments

Comments
 (0)