Skip to content

Commit 2df04bb

Browse files
committed
make CPython's str.rsplit tests pass
1 parent 28e5f35 commit 2df04bb

File tree

2 files changed

+88
-24
lines changed

2 files changed

+88
-24
lines changed

graalpython/com.oracle.graal.python.test/src/tests/test_string.py

Lines changed: 75 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -911,10 +911,10 @@ def test_startswith(self):
911911
self.checkequal(True, 'hello', 'startswith', 'o', -1)
912912
self.checkequal(True, 'hello', 'startswith', '', -3, -3)
913913
self.checkequal(False, 'hello', 'startswith', 'lo', -9)
914-
914+
915915
self.checkraises(TypeError, 'hello', 'startswith')
916916
#self.checkraises(TypeError, 'hello', 'startswith', 42)
917-
917+
918918
# test tuple arguments
919919
self.checkequal(True, 'hello', 'startswith', ('he', 'ha'))
920920
self.checkequal(False, 'hello', 'startswith', ('lo', 'llo'))
@@ -927,11 +927,82 @@ def test_startswith(self):
927927
self.checkequal(True, 'hello', 'startswith', ('lo', 'he'), 0, -1)
928928
self.checkequal(False, 'hello', 'startswith', ('he', 'hel'), 0, 1)
929929
self.checkequal(True, 'hello', 'startswith', ('he', 'hel'), 0, 2)
930-
930+
931931
self.checkraises(TypeError, 'hello', 'startswith', (42,))
932932
self.checkequal(True, 'hello', 'startswith', ('he', 42))
933933
self.checkraises(TypeError, 'hello', 'startswith', ('ne', 42,))
934934

935+
def test_rsplit(self):
936+
# by a char
937+
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|')
938+
self.checkequal(['a|b|c', 'd'], 'a|b|c|d', 'rsplit', '|', 1)
939+
self.checkequal(['a|b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 2)
940+
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 3)
941+
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|', 4)
942+
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', '|',
943+
sys.maxsize-100)
944+
self.checkequal(['a|b|c|d'], 'a|b|c|d', 'rsplit', '|', 0)
945+
self.checkequal(['a||b||c', '', 'd'], 'a||b||c||d', 'rsplit', '|', 2)
946+
self.checkequal(['abcd'], 'abcd', 'rsplit', '|')
947+
self.checkequal([''], '', 'rsplit', '|')
948+
self.checkequal(['', ' begincase'], '| begincase', 'rsplit', '|')
949+
self.checkequal(['endcase ', ''], 'endcase |', 'rsplit', '|')
950+
self.checkequal(['', 'bothcase', ''], '|bothcase|', 'rsplit', '|')
951+
952+
self.checkequal(['a\x00\x00b', 'c', 'd'], 'a\x00\x00b\x00c\x00d', 'rsplit', '\x00', 2)
953+
954+
self.checkequal(['a']*20, ('a|'*20)[:-1], 'rsplit', '|')
955+
self.checkequal(['a|a|a|a|a']+['a']*15,
956+
('a|'*20)[:-1], 'rsplit', '|', 15)
957+
958+
# by string
959+
self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//')
960+
self.checkequal(['a//b//c', 'd'], 'a//b//c//d', 'rsplit', '//', 1)
961+
self.checkequal(['a//b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 2)
962+
self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 3)
963+
self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//', 4)
964+
self.checkequal(['a', 'b', 'c', 'd'], 'a//b//c//d', 'rsplit', '//',
965+
sys.maxsize-5)
966+
self.checkequal(['a//b//c//d'], 'a//b//c//d', 'rsplit', '//', 0)
967+
self.checkequal(['a////b////c', '', 'd'], 'a////b////c////d', 'rsplit', '//', 2)
968+
self.checkequal(['', ' begincase'], 'test begincase', 'rsplit', 'test')
969+
self.checkequal(['endcase ', ''], 'endcase test', 'rsplit', 'test')
970+
self.checkequal(['', ' bothcase ', ''], 'test bothcase test',
971+
'rsplit', 'test')
972+
self.checkequal(['ab', 'c'], 'abbbc', 'rsplit', 'bb')
973+
self.checkequal(['', ''], 'aaa', 'rsplit', 'aaa')
974+
self.checkequal(['aaa'], 'aaa', 'rsplit', 'aaa', 0)
975+
self.checkequal(['ab', 'ab'], 'abbaab', 'rsplit', 'ba')
976+
self.checkequal(['aaaa'], 'aaaa', 'rsplit', 'aab')
977+
self.checkequal([''], '', 'rsplit', 'aaa')
978+
self.checkequal(['aa'], 'aa', 'rsplit', 'aaa')
979+
self.checkequal(['bbob', 'A'], 'bbobbbobbA', 'rsplit', 'bbobb')
980+
self.checkequal(['', 'B', 'A'], 'bbobbBbbobbA', 'rsplit', 'bbobb')
981+
982+
self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH')
983+
self.checkequal(['a']*20, ('aBLAH'*20)[:-4], 'rsplit', 'BLAH', 19)
984+
self.checkequal(['aBLAHa'] + ['a']*18, ('aBLAH'*20)[:-4],
985+
'rsplit', 'BLAH', 18)
986+
987+
# with keyword args
988+
self.checkequal(['a', 'b', 'c', 'd'], 'a|b|c|d', 'rsplit', sep='|')
989+
self.checkequal(['a|b|c', 'd'],
990+
'a|b|c|d', 'rsplit', '|', maxsplit=1)
991+
self.checkequal(['a|b|c', 'd'],
992+
'a|b|c|d', 'rsplit', sep='|', maxsplit=1)
993+
self.checkequal(['a|b|c', 'd'],
994+
'a|b|c|d', 'rsplit', maxsplit=1, sep='|')
995+
self.checkequal(['a b c', 'd'],
996+
'a b c d', 'rsplit', maxsplit=1)
997+
998+
# argument type
999+
self.checkraises(TypeError, 'hello', 'rsplit', 42, 42, 42)
1000+
1001+
# null case
1002+
self.checkraises(ValueError, 'hello', 'rsplit', '')
1003+
self.checkraises(ValueError, 'hello', 'rsplit', '', 0)
1004+
1005+
9351006
def test_same_id():
9361007
empty_ids = set([id(str()) for i in range(100)])
9371008
assert len(empty_ids) == 1
@@ -957,7 +1028,7 @@ def test_translate_from_byte_table():
9571028
assert "ahoj".translate(table) == "AHOJ"
9581029
assert "ahoj".translate(bytearray(table)) == "AHOJ"
9591030
assert "ahoj".translate(memoryview(table)) == "AHOJ"
960-
1031+
9611032
def test_tranlslate_from_short_table():
9621033
table = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`ABCDEFGH'
9631034
assert "ahoj".translate(table) == "AHoj"
@@ -983,4 +1054,3 @@ def test_literals():
9831054
assert "hello\[world\]"[6] == "["
9841055
assert "hello\[world\]"[12] == "\\"
9851056
assert "hello\[world\]"[13] == "]"
986-

graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/objects/str/StringBuiltins.java

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,7 +1174,7 @@ private PList splitfields(String s, int maxsplit) {
11741174
}
11751175

11761176
// str.split
1177-
@Builtin(name = "rsplit", maxNumOfPositionalArgs = 3)
1177+
@Builtin(name = "rsplit", fixedNumOfPositionalArgs = 1, keywordArguments = {"sep", "maxsplit"})
11781178
@GenerateNodeFactory
11791179
@TypeSystemReference(PythonArithmeticTypes.class)
11801180
public abstract static class RSplitNode extends SplitBaseNode {
@@ -1185,25 +1185,21 @@ public PList doSplit(String self, PNone sep, PNone maxsplit) {
11851185
return rsplitfields(self, -1);
11861186
}
11871187

1188-
@SuppressWarnings("unused")
1189-
@TruffleBoundary
11901188
@Specialization
1191-
public PList doSplit(String self, String sep, PNone maxsplit) {
1192-
PList list = factory().createList();
1193-
String[] strs = self.split(Pattern.quote(sep));
1194-
for (String s : strs) {
1195-
getAppendNode().execute(list, s);
1196-
}
1197-
return list;
1189+
public PList doSplit(String self, String sep, @SuppressWarnings("unused") PNone maxsplit) {
1190+
return doSplit(self, sep, Integer.MAX_VALUE);
11981191
}
11991192

12001193
@Specialization
12011194
public PList doSplit(String self, String sep, int maxsplit) {
1195+
if (sep.length() == 0) {
1196+
throw raise(ValueError, "empty separator");
1197+
}
12021198
PList list = factory().createList();
12031199
int splits = 0;
12041200
int end = self.length();
12051201
String remainder = self;
1206-
int sepLength = Math.max(1, sep.length());
1202+
int sepLength = sep.length();
12071203
while (splits < maxsplit) {
12081204
int idx = remainder.lastIndexOf(sep);
12091205

@@ -1217,10 +1213,7 @@ public PList doSplit(String self, String sep, int maxsplit) {
12171213
remainder = remainder.substring(0, end);
12181214
}
12191215

1220-
if (!remainder.isEmpty()) {
1221-
getAppendNode().execute(list, remainder);
1222-
}
1223-
1216+
getAppendNode().execute(list, remainder);
12241217
getReverseNode().execute(list);
12251218
return list;
12261219
}
@@ -1277,18 +1270,19 @@ private PList rsplitfields(String s, int maxsplit) {
12771270
// The next segment runs up to the next next whitespace or end
12781271
for (index = end; index >= 0; index--) {
12791272
if (isWhitespace(s.codePointAt(index))) {
1280-
// Break leaving index pointing at whitespace
1273+
// Break leaving index pointing after the found whitespace
1274+
index++;
12811275
break;
12821276
}
12831277
}
12841278
}
12851279

12861280
// Make a piece from start up to index
1287-
getAppendNode().execute(list, s.substring(index + 1, end + 1));
1281+
getAppendNode().execute(list, s.substring(index, end + 1));
12881282
splits++;
12891283

1290-
// Start next segment search at that point
1291-
end = index;
1284+
// Start next segment search at the whitespace
1285+
end = index - 1;
12921286
}
12931287

12941288
getReverseNode().execute(list);

0 commit comments

Comments
 (0)