Skip to content

Commit 50d6f21

Browse files
committed
Try to fix jinja memory leak
1 parent 2f53e37 commit 50d6f21

File tree

2 files changed

+35
-35
lines changed

2 files changed

+35
-35
lines changed

airbyte_cdk/sources/declarative/interpolation/jinja.py

+35-34
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import ast
66
from functools import cache
7-
from typing import Any, Mapping, Optional, Tuple, Type
7+
from typing import Any, Mapping, Optional, Set, Tuple, Type
88

99
from jinja2 import meta
1010
from jinja2.environment import Template
@@ -30,6 +30,34 @@ def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool:
3030
return super().is_safe_attribute(obj, attr, value) # type: ignore # for some reason, mypy says 'Returning Any from function declared to return "bool"'
3131

3232

33+
# These aliases are used to deprecate existing keywords without breaking all existing connectors.
34+
_ALIASES = {
35+
"stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
36+
"stream_partition": "stream_slice", # Use stream_partition to access partition router's values
37+
}
38+
39+
# These extensions are not installed so they're not currently a problem,
40+
# but we're still explicitely removing them from the jinja context.
41+
# At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
42+
_RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
43+
44+
# By default, these Python builtin functions are available in the Jinja context.
45+
# We explicitely remove them because of the potential security risk.
46+
# Please add a unit test to test_jinja.py when adding a restriction.
47+
_RESTRICTED_BUILTIN_FUNCTIONS = [
48+
"range"
49+
] # The range function can cause very expensive computations
50+
51+
_ENVIRONMENT = StreamPartitionAccessEnvironment()
52+
_ENVIRONMENT.filters.update(**filters)
53+
_ENVIRONMENT.globals.update(**macros)
54+
55+
for extension in _RESTRICTED_EXTENSIONS:
56+
_ENVIRONMENT.extensions.pop(extension, None)
57+
for builtin in _RESTRICTED_BUILTIN_FUNCTIONS:
58+
_ENVIRONMENT.globals.pop(builtin, None)
59+
60+
3361
class JinjaInterpolation(Interpolation):
3462
"""
3563
Interpolation strategy using the Jinja2 template engine.
@@ -48,34 +76,6 @@ class JinjaInterpolation(Interpolation):
4876
Additional information on jinja templating can be found at https://jinja.palletsprojects.com/en/3.1.x/templates/#
4977
"""
5078

51-
# These aliases are used to deprecate existing keywords without breaking all existing connectors.
52-
ALIASES = {
53-
"stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
54-
"stream_partition": "stream_slice", # Use stream_partition to access partition router's values
55-
}
56-
57-
# These extensions are not installed so they're not currently a problem,
58-
# but we're still explicitely removing them from the jinja context.
59-
# At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
60-
RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
61-
62-
# By default, these Python builtin functions are available in the Jinja context.
63-
# We explicitely remove them because of the potential security risk.
64-
# Please add a unit test to test_jinja.py when adding a restriction.
65-
RESTRICTED_BUILTIN_FUNCTIONS = [
66-
"range"
67-
] # The range function can cause very expensive computations
68-
69-
def __init__(self) -> None:
70-
self._environment = StreamPartitionAccessEnvironment()
71-
self._environment.filters.update(**filters)
72-
self._environment.globals.update(**macros)
73-
74-
for extension in self.RESTRICTED_EXTENSIONS:
75-
self._environment.extensions.pop(extension, None)
76-
for builtin in self.RESTRICTED_BUILTIN_FUNCTIONS:
77-
self._environment.globals.pop(builtin, None)
78-
7979
def eval(
8080
self,
8181
input_str: str,
@@ -86,7 +86,7 @@ def eval(
8686
) -> Any:
8787
context = {"config": config, **additional_parameters}
8888

89-
for alias, equivalent in self.ALIASES.items():
89+
for alias, equivalent in _ALIASES.items():
9090
if alias in context:
9191
# This is unexpected. We could ignore or log a warning, but failing loudly should result in fewer surprises
9292
raise ValueError(
@@ -105,6 +105,7 @@ def eval(
105105
raise Exception(f"Expected a string, got {input_str}")
106106
except UndefinedError:
107107
pass
108+
108109
# If result is empty or resulted in an undefined error, evaluate and return the default string
109110
return self._literal_eval(self._eval(default, context), valid_types)
110111

@@ -132,16 +133,16 @@ def _eval(self, s: Optional[str], context: Mapping[str, Any]) -> Optional[str]:
132133
return s
133134

134135
@cache
135-
def _find_undeclared_variables(self, s: Optional[str]) -> Template:
136+
def _find_undeclared_variables(self, s: Optional[str]) -> Set[str]:
136137
"""
137138
Find undeclared variables and cache them
138139
"""
139-
ast = self._environment.parse(s) # type: ignore # parse is able to handle None
140+
ast = _ENVIRONMENT.parse(s) # type: ignore # parse is able to handle None
140141
return meta.find_undeclared_variables(ast)
141142

142143
@cache
143-
def _compile(self, s: Optional[str]) -> Template:
144+
def _compile(self, s: str) -> Template:
144145
"""
145146
We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader
146147
"""
147-
return self._environment.from_string(s)
148+
return _ENVIRONMENT.from_string(s)

airbyte_cdk/sources/streams/http/http_client.py

-1
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,6 @@ def _write(self, key: str, value: str) -> None:
538538
logger.warning(f"Error while saving item to cache: {exception}")
539539

540540

541-
542541
class SkipFailureSQLiteCache(requests_cache.backends.sqlite.SQLiteCache):
543542
def __init__( # type: ignore # ignoring as lib is not typed
544543
self,

0 commit comments

Comments
 (0)