Skip to content

Commit 5c7920a

Browse files
committed
[GR-11670] Implement support for 're.split'.
PullRequest: graalpython/194
2 parents 8bd62c1 + a3fadff commit 5c7920a

File tree

7 files changed

+160
-63
lines changed

7 files changed

+160
-63
lines changed

graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/GraalPythonEnvVars.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,13 @@ private static String discoverHomeFromSource() throws IOException {
5757
final Path codeDir = codeLocation.getParent();
5858

5959
// executing from jar file in source tree
60-
if (codeDir.endsWith(Paths.get("mxbuild", "dists"))) {
61-
final Path candidate = codeDir.getParent().getParent().resolve("graalpython");
62-
if (isGraalPythonHome(candidate)) {
63-
// Jar source build
64-
return candidate.toFile().getCanonicalPath().toString();
60+
for (Path cur = codeDir; cur.getNameCount() >= 2; cur = cur.getParent()) {
61+
if (cur.endsWith(Paths.get("mxbuild", "dists"))) {
62+
final Path candidate = cur.getParent().getParent().resolve("graalpython");
63+
if (isGraalPythonHome(candidate)) {
64+
// Jar source build
65+
return candidate.toFile().getCanonicalPath().toString();
66+
}
6567
}
6668
}
6769

graalpython/com.oracle.graal.python.test/src/com/oracle/graal/python/test/builtin/RegularExpressionTests.java

Lines changed: 0 additions & 53 deletions
This file was deleted.

graalpython/com.oracle.graal.python.test/src/tests/test_assign.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def varfunc():
5858
assert 'x' in var
5959
assert var['x'] == 10
6060

61+
6162
global_var = {}
6263

6364

@@ -70,3 +71,17 @@ def test_assign_nonlocal_func_return():
7071

7172
assert 'x' in global_var
7273
assert global_var['x'] == 10
74+
75+
76+
def test_destructuring():
77+
a, b = (1, 2)
78+
assert a == 1 and b == 2
79+
80+
a, b, c = "\xe0\xdf\xe7"
81+
assert a == "à" and b == "ß" and c == "ç"
82+
83+
a, b, c = "\u0430\u0431\u0432"
84+
assert a == 'а' and b == 'б' and c == 'в'
85+
# TODO not supported yet
86+
# a, b, c = "\U0001d49c\U0001d49e\U0001d4b5"
87+
# assert a == '𝒜' and b == '𝒞' and c == '𝒵'

graalpython/com.oracle.graal.python.test/src/tests/test_re.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,86 @@ def test_symbolic_groups(self):
244244
self.checkPatternError(r'(?(-1))', "bad character in group name '-1'", 3)
245245
self.assertEqual(re.match(pat, 'xc8yz').span(), (0, 5))
246246

247+
def test_re_split(self):
248+
for string in ":a:b::c", S(":a:b::c"):
249+
self.assertTypedEqual(re.split(":", string),
250+
['', 'a', 'b', '', 'c'])
251+
self.assertTypedEqual(re.split(":+", string),
252+
['', 'a', 'b', 'c'])
253+
self.assertTypedEqual(re.split("(:+)", string),
254+
['', ':', 'a', ':', 'b', '::', 'c'])
255+
for string in (b":a:b::c", B(b":a:b::c"), bytearray(b":a:b::c"),
256+
memoryview(b":a:b::c")):
257+
self.assertTypedEqual(re.split(b":", string),
258+
[b'', b'a', b'b', b'', b'c'])
259+
self.assertTypedEqual(re.split(b":+", string),
260+
[b'', b'a', b'b', b'c'])
261+
self.assertTypedEqual(re.split(b"(:+)", string),
262+
[b'', b':', b'a', b':', b'b', b'::', b'c'])
263+
# TODO not supported yet
264+
# for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432", "\U0001d49c\U0001d49e\U0001d4b5"):
265+
for a, b, c in ("\xe0\xdf\xe7", "\u0430\u0431\u0432"):
266+
string = ":%s:%s::%s" % (a, b, c)
267+
self.assertEqual(re.split(":", string), ['', a, b, '', c])
268+
self.assertEqual(re.split(":+", string), ['', a, b, c])
269+
self.assertEqual(re.split("(:+)", string),
270+
['', ':', a, ':', b, '::', c])
271+
272+
self.assertEqual(re.split("(?::+)", ":a:b::c"), ['', 'a', 'b', 'c'])
273+
self.assertEqual(re.split("(:)+", ":a:b::c"),
274+
['', ':', 'a', ':', 'b', ':', 'c'])
275+
self.assertEqual(re.split("([b:]+)", ":a:b::c"),
276+
['', ':', 'a', ':b::', 'c'])
277+
self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
278+
['', None, ':', 'a', None, ':', '', 'b', None, '',
279+
None, '::', 'c'])
280+
self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
281+
['', 'a', '', '', 'c'])
282+
283+
# TODO subtests not support yet
284+
# for sep, expected in [
285+
# (':*', ['', 'a', 'b', 'c']),
286+
# ('(?::*)', ['', 'a', 'b', 'c']),
287+
# ('(:*)', ['', ':', 'a', ':', 'b', '::', 'c']),
288+
# ('(:)*', ['', ':', 'a', ':', 'b', ':', 'c']),
289+
# ]:
290+
# with self.subTest(sep=sep), self.assertWarns(FutureWarning):
291+
# self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
292+
# for sep, expected in [
293+
# ('', [':a:b::c']),
294+
# (r'\b', [':a:b::c']),
295+
# (r'(?=:)', [':a:b::c']),
296+
# (r'(?<=:)', [':a:b::c']),
297+
# ]:
298+
# with self.subTest(sep=sep), self.assertRaises(ValueError):
299+
# self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)
300+
301+
def test_re_findall(self):
302+
self.assertEqual(re.findall(":+", "abc"), [])
303+
for string in "a:b::c:::d", S("a:b::c:::d"):
304+
self.assertTypedEqual(re.findall(":+", string),
305+
[":", "::", ":::"])
306+
self.assertTypedEqual(re.findall("(:+)", string),
307+
[":", "::", ":::"])
308+
self.assertTypedEqual(re.findall("(:)(:*)", string),
309+
[(":", ""), (":", ":"), (":", "::")])
310+
for string in (b"a:b::c:::d", B(b"a:b::c:::d"), bytearray(b"a:b::c:::d"),
311+
memoryview(b"a:b::c:::d")):
312+
self.assertTypedEqual(re.findall(b":+", string),
313+
[b":", b"::", b":::"])
314+
self.assertTypedEqual(re.findall(b"(:+)", string),
315+
[b":", b"::", b":::"])
316+
self.assertTypedEqual(re.findall(b"(:)(:*)", string),
317+
[(b":", b""), (b":", b":"), (b":", b"::")])
318+
for x in ("\xe0", "\u0430", "\U0001d49c"):
319+
xx = x * 2
320+
xxx = x * 3
321+
string = "a%sb%sc%sd" % (x, xx, xxx)
322+
self.assertEqual(re.findall("%s+" % x, string), [x, xx, xxx])
323+
self.assertEqual(re.findall("(%s+)" % x, string), [x, xx, xxx])
324+
self.assertEqual(re.findall("(%s)(%s*)" % (x, x), string),
325+
[(x, ""), (x, x), (x, xx)])
326+
247327
def test_ignore_case_set(self):
248328
self.assertTrue(re.match(r'[19A]', 'A', re.I))
249329
self.assertTrue(re.match(r'[19a]', 'a', re.I))

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ protected List<com.oracle.truffle.api.dsl.NodeFactory<? extends PythonBuiltinBas
111111
public abstract static class StrNode extends PythonUnaryBuiltinNode {
112112
@Specialization
113113
public Object str(PString self) {
114-
return self;
114+
return self.getValue();
115115
}
116116

117117
@Specialization

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/parser/PythonTreeTranslator.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,26 @@ private static String unescapeJavaString(String st) {
730730
sb.append(Character.toChars(code));
731731
i += 5;
732732
continue;
733+
// Hex Unicode: U????????
734+
case 'U':
735+
if (i >= st.length() - 9) {
736+
ch = 'U';
737+
break;
738+
}
739+
code = Integer.parseInt(st.substring(i + 2, i + 10), 16);
740+
sb.append(Character.toChars(code));
741+
i += 9;
742+
continue;
743+
// Hex Unicode: x??
744+
case 'x':
745+
if (i >= st.length() - 3) {
746+
ch = 'u';
747+
break;
748+
}
749+
int hexCode = Integer.parseInt("" + st.charAt(i + 2) + st.charAt(i + 3), 16);
750+
sb.append(Character.toChars(hexCode));
751+
i += 3;
752+
continue;
733753
}
734754
i++;
735755
}

graalpython/lib-graalpython/_sre.py

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -255,10 +255,13 @@ def findall(self, string, pos=0, endpos=-1):
255255
result = tregex_call_safe(pattern.exec, string, pos)
256256
if not result.isMatch:
257257
break
258-
elif self.num_groups == 0:
259-
matchlist.append("")
260-
elif self.num_groups == 1:
261-
matchlist.append(string[result.start[1]:result.end[1]])
258+
elif result.groupCount == 0:
259+
assert False, "inconsistent regex result"
260+
matchlist.append('')
261+
elif result.groupCount == 1:
262+
matchlist.append(str(string[result.start[0]:result.end[0]]))
263+
elif result.groupCount == 2:
264+
matchlist.append(str(string[result.start[1]:result.end[1]]))
262265
else:
263266
matchlist.append(SRE_Match(self, pos, endpos, result).groups())
264267
no_progress = (result.start[0] == result.end[0])
@@ -355,6 +358,36 @@ def sub(self, repl, string, count=0):
355358
return self.__compile_cpython_sre().sub(repl, string, count)
356359

357360

361+
def split(self, string, maxsplit=0):
362+
n = 0
363+
try:
364+
pattern = self.__tregex_compile(self.pattern)
365+
result = []
366+
pos = 0
367+
progress = True
368+
while (maxsplit == 0 or n < maxsplit) and pos <= len(string) and progress:
369+
match_result = tregex_call_safe(pattern.exec, string, pos)
370+
if not match_result.isMatch:
371+
break
372+
n += 1
373+
start = match_result.start[0]
374+
end = match_result.end[0]
375+
result.append(str(string[pos:start]))
376+
# add all group strings
377+
for i in range(match_result.groupCount-1):
378+
groupStart = match_result.start[i + 1]
379+
if groupStart >= 0:
380+
result.append(str(string[groupStart:match_result.end[i+1]]))
381+
else:
382+
result.append(None)
383+
pos = end
384+
progress = (start != end)
385+
result.append(str(string[pos:]))
386+
return result
387+
except BaseException:
388+
return self.__compile_cpython_sre().split(string, maxsplit)
389+
390+
358391
compile = SRE_Pattern
359392

360393

0 commit comments

Comments
 (0)