Skip to content

Commit 3336d08

Browse files
fbx31fbx31
and
fbx31
authored
Add new functions to Series and Expressions, some docs update (#319)
New functions in src\lazy\dsl.rs - add implode, str_strip_chars, str_starts_with, str_ends_with - add expression support in str_replace and str_replace_all - add struct_field_by_index, struct_with_fields Update documentation for Series in polars/series/* - replaced ListNamespace by SeriesListFunctions (ListSeries in doc) and StringNamespace by SeriesStringFunctions (StringSeries in doc) - renamed SeriesDateFunctions by DatetimeSeries and SeriesStructFunctions by StructSeries _Unfortunately Namespaces are not consistent between Series and Lazy with current polars sources (or I didn't succeed to align them). So not sure that Namespaces in shared_traits are very usefull._ Update implementation and documentation for Expr in polars/lazy/expr/* - convert ExprDateTime from type to interface and ExprList from type to interface - add nth and withFields in interface ExprStruct - for strings: change signatures of contains, extract, replace and replaceAll to support Expressions, new functions endsWith, startsWith, stripChars, stripCharsEnd, stripCharsStart Add new tests - in __tests__/series.test.ts for "str contains" and "struct:nth" - in __tests__/expr.test.ts for "expr:implode", "str:endsWith", "str:starsWith", "str:replace", "str:replaceAll", "struct:nth", "struct:withFields", "str:stripChars" _2 new added tests are still skipped with comment "// TODO: Remove skip when polars-plan will support for "dynamic pattern length in 'str.replace' expressions""_ Lint on rust (cargo fmt) and typescript (biome) OK Doc generation OK : typedoc --> [warning] Found 0 errors and 358 warnings Tests passed Test Suites: 14 passed, 14 total Tests: 3 skipped, 3 todo, 1052 passed, 1058 total Snapshots: 0 total Time: 26.468 s, estimated 30 s Ran all test suites. Configuation used - Windows (sorry in advance for the CR/LF burden) - nodejs v22.11.0 - npm v10.9.0 (no use or yarn) - biome v1.9.4 - typedoc v0.27.9 - typescript v5.7.2 - cargo/rustc v1.85.0-nightly --------- Co-authored-by: fbx31 <[email protected]>
1 parent dbc164d commit 3336d08

File tree

13 files changed

+820
-144
lines changed

13 files changed

+820
-144
lines changed

__tests__/expr.test.ts

+265-2
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,18 @@ describe("expr", () => {
261261
const actual = df.select(col("nrs").explode());
262262
expect(actual).toFrameEqual(expected);
263263
});
264+
test("implode", () => {
265+
const df = pl.DataFrame({
266+
nrs: [1, 2, 1, 3],
267+
strs: ["a", "b", null, "d"],
268+
});
269+
const expected = pl.DataFrame({
270+
nrs: [[1, 2, 1, 3]],
271+
strs: [["a", "b", null, "d"]],
272+
});
273+
const actual = df.select(col("nrs").implode(), col("strs").implode());
274+
expect(actual).toFrameEqual(expected);
275+
});
264276
test("extend", () => {
265277
const df = pl.DataFrame({
266278
a: [1, 2, 3, 4, 5],
@@ -1075,6 +1087,56 @@ describe("expr.str", () => {
10751087
);
10761088
expect(actual).toFrameEqual(expected);
10771089
});
1090+
test("endsWith", () => {
1091+
const df = pl.DataFrame({
1092+
fruits: ["apple", "mango", null],
1093+
});
1094+
const expected = df.withColumn(
1095+
pl.Series("has_suffix", [false, true, null], pl.Bool),
1096+
);
1097+
const actual = df.withColumn(
1098+
col("fruits").str.endsWith("go").as("has_suffix"),
1099+
);
1100+
expect(actual).toFrameEqual(expected);
1101+
});
1102+
test("endsWith:expr", () => {
1103+
const df = pl.DataFrame({
1104+
fruits: ["apple", "mango", "banana"],
1105+
suffix: ["le", "go", "nu"],
1106+
});
1107+
const expected = df.withColumn(
1108+
pl.Series("has_suffix", [true, true, false], pl.Bool),
1109+
);
1110+
const actual = df.withColumn(
1111+
col("fruits").str.endsWith(pl.col("suffix")).as("has_suffix"),
1112+
);
1113+
expect(actual).toFrameEqual(expected);
1114+
});
1115+
test("startsWith", () => {
1116+
const df = pl.DataFrame({
1117+
fruits: ["apple", "mango", null],
1118+
});
1119+
const expected = df.withColumn(
1120+
pl.Series("has_prefix", [true, false, null], pl.Bool),
1121+
);
1122+
const actual = df.withColumn(
1123+
col("fruits").str.startsWith("app").as("has_prefix"),
1124+
);
1125+
expect(actual).toFrameEqual(expected);
1126+
});
1127+
test("startsWith:expr", () => {
1128+
const df = pl.DataFrame({
1129+
fruits: ["apple", "mango", "banana"],
1130+
prefix: ["app", "na", "ba"],
1131+
});
1132+
const expected = df.withColumn(
1133+
pl.Series("has_prefix", [true, false, true], pl.Bool),
1134+
);
1135+
const actual = df.withColumn(
1136+
col("fruits").str.startsWith(pl.col("prefix")).as("has_prefix"),
1137+
);
1138+
expect(actual).toFrameEqual(expected);
1139+
});
10781140
test("split", () => {
10791141
const df = pl.DataFrame({ a: ["ab,cd", "e,fg", "h"] });
10801142
const expected = pl.DataFrame({
@@ -1196,10 +1258,10 @@ describe("expr.str", () => {
11961258
});
11971259
test("str.replace", () => {
11981260
const df = pl.DataFrame({
1199-
os: ["kali-linux", "debian-linux", "ubuntu-linux", "mac-sierra"],
1261+
os: ["kali-linux", "debian-linux", null, "mac-sierra"],
12001262
});
12011263
const expected = pl.DataFrame({
1202-
os: ["kali:linux", "debian:linux", "ubuntu:linux", "mac:sierra"],
1264+
os: ["kali:linux", "debian:linux", null, "mac:sierra"],
12031265
});
12041266
const seriesActual = df
12051267
.getColumn("os")
@@ -1210,6 +1272,48 @@ describe("expr.str", () => {
12101272
expect(actual).toFrameEqual(expected);
12111273
expect(seriesActual).toFrameEqual(expected);
12121274
});
1275+
test("str.replace:Expr1", () => {
1276+
const df = pl.DataFrame({
1277+
os: ["kali-linux", "debian-linux", null, "mac-sierra"],
1278+
val: ["windows", "acorn", "atari", null],
1279+
});
1280+
const expected = pl.DataFrame({
1281+
os: ["kali-windows", "debian-acorn", null, null],
1282+
});
1283+
const actual = df.select(
1284+
col("os").str.replace("linux", col("val")).as("os"),
1285+
);
1286+
expect(actual).toFrameEqual(expected);
1287+
});
1288+
test("str.replace:Expr2", () => {
1289+
const df = pl.DataFrame({
1290+
cost: ["#12.34", "#56.78"],
1291+
text: ["123abc", "abc456"],
1292+
});
1293+
const expected = pl.DataFrame({
1294+
expr: ["123#12.34", "#56.78456"],
1295+
});
1296+
const actual = df.select(
1297+
col("text").str.replace("abc", pl.col("cost")).alias("expr"),
1298+
);
1299+
expect(actual).toFrameEqual(expected);
1300+
});
1301+
// TODO: Remove skip when polars-plan will support for "dynamic pattern length in 'str.replace' expressions"
1302+
test.skip("str.replace:Expr3", () => {
1303+
const df = pl.DataFrame({
1304+
os: ["kali-linux", "debian-linux", "ubuntu-linux", "mac-sierra"],
1305+
pat: ["linux", "linux", "linux", "mac"],
1306+
val: ["windows", "acorn", "atari", "arm"],
1307+
});
1308+
const expected = pl.DataFrame({
1309+
os: ["kali-windows", "debian-acorn", "ubuntu-atari", "arm-sierra"],
1310+
});
1311+
const actual = df.select(
1312+
col("os").str.replace(col("pat"), col("val")).as("os"),
1313+
);
1314+
expect(actual).toFrameEqual(expected);
1315+
});
1316+
12131317
test("str.replaceAll", () => {
12141318
const df = pl.DataFrame({
12151319
os: [
@@ -1236,6 +1340,108 @@ describe("expr.str", () => {
12361340
expect(actual).toFrameEqual(expected);
12371341
expect(seriesActual).toFrameEqual(expected);
12381342
});
1343+
test("str.replaceAll:Expr", () => {
1344+
const df = pl.DataFrame({
1345+
os: [
1346+
"kali-linux-2021.3a",
1347+
null,
1348+
"ubuntu-linux-16.04",
1349+
"mac-sierra-10.12.1",
1350+
],
1351+
val: [":", ":", null, "_"],
1352+
});
1353+
const expected = pl.DataFrame({
1354+
os: ["kali:linux:2021.3a", null, null, "mac_sierra_10.12.1"],
1355+
});
1356+
const actual = df.select(
1357+
col("os").str.replaceAll("-", col("val")).as("os"),
1358+
);
1359+
expect(actual).toFrameEqual(expected);
1360+
});
1361+
// TODO: Remove skip when polars-plan will support for "dynamic pattern length in 'str.replace' expressions"
1362+
test.skip("str.replaceAll:Expr2", () => {
1363+
const df = pl.DataFrame({
1364+
os: [
1365+
"kali-linux-2021.3a",
1366+
null,
1367+
"ubuntu-linux-16.04",
1368+
"mac-sierra-10.12.1",
1369+
],
1370+
pat: ["-", "-", "-", "."],
1371+
val: [":", ":", null, "_"],
1372+
});
1373+
const expected = pl.DataFrame({
1374+
os: ["kali:linux:2021.3a", null, null, "mac-sierra-10_12_1"],
1375+
});
1376+
const actual = df.select(
1377+
col("os").str.replaceAll(col("pat"), col("val")).as("os"),
1378+
);
1379+
expect(actual).toFrameEqual(expected);
1380+
});
1381+
test("struct:field", () => {
1382+
const df = pl.DataFrame({
1383+
objs: [
1384+
{ a: 1, b: 2.0, c: "abc" },
1385+
{ a: 10, b: 20.0, c: "def" },
1386+
],
1387+
});
1388+
const expected = pl.DataFrame({
1389+
b: [2.0, 20.0],
1390+
last: ["abc", "def"],
1391+
});
1392+
const actual = df.select(
1393+
col("objs").struct.field("b"),
1394+
col("objs").struct.field("c").as("last"),
1395+
);
1396+
expect(actual).toFrameStrictEqual(expected);
1397+
});
1398+
test("struct:nth", () => {
1399+
const df = pl.DataFrame({
1400+
objs: [
1401+
{ a: 1, b: 2.0, c: "abc" },
1402+
{ a: 10, b: 20.0, c: "def" },
1403+
],
1404+
});
1405+
const expected = pl.DataFrame({
1406+
b: [2.0, 20.0],
1407+
last: ["abc", "def"],
1408+
});
1409+
const actual = df.select(
1410+
col("objs").struct.nth(1),
1411+
col("objs").struct.nth(2).as("last"),
1412+
);
1413+
expect(actual).toFrameStrictEqual(expected);
1414+
});
1415+
test("struct:withFields", () => {
1416+
const df = pl.DataFrame({
1417+
objs: [
1418+
{ a: 1, b: 2.0, c: "abc" },
1419+
{ a: 10, b: 20.0, c: "def" },
1420+
],
1421+
more: ["text1", "text2"],
1422+
final: [100, null],
1423+
});
1424+
const expected = pl.DataFrame({
1425+
objs: [
1426+
{ a: 1, b: 2.0, c: "abc", d: null, e: "text" },
1427+
{ a: 10, b: 20.0, c: "def", d: null, e: "text" },
1428+
],
1429+
new: [
1430+
{ a: 1, b: 2.0, c: "abc", more: "text1", final: 100 },
1431+
{ a: 10, b: 20.0, c: "def", more: "text2", final: null },
1432+
],
1433+
});
1434+
const actual = df.select(
1435+
col("objs").struct.withFields([
1436+
pl.lit(null).alias("d"),
1437+
pl.lit("text").alias("e"),
1438+
]),
1439+
col("objs")
1440+
.struct.withFields([col("more"), col("final")])
1441+
.alias("new"),
1442+
);
1443+
expect(actual).toFrameStrictEqual(expected);
1444+
});
12391445
test("expr.replace", () => {
12401446
const df = pl.DataFrame({ a: [1, 2, 2, 3], b: ["a", "b", "c", "d"] });
12411447
{
@@ -1443,6 +1649,63 @@ describe("expr.str", () => {
14431649
expect(actual).toFrameEqual(expected);
14441650
expect(seriesActual).toFrameEqual(expected);
14451651
});
1652+
1653+
test("stripChars:Expr", () => {
1654+
const df = pl.DataFrame({
1655+
os: [
1656+
"#Kali-Linux###",
1657+
"$$$Debian-Linux$",
1658+
null,
1659+
"Ubuntu-Linux ",
1660+
" Mac-Sierra",
1661+
],
1662+
chars: ["#", "$", " ", " ", null],
1663+
});
1664+
const expected = pl.DataFrame({
1665+
os: ["Kali-Linux", "Debian-Linux", null, "Ubuntu-Linux", "Mac-Sierra"],
1666+
});
1667+
const actual = df.select(col("os").str.stripChars(col("chars")).as("os"));
1668+
expect(actual).toFrameEqual(expected);
1669+
});
1670+
test("stripCharsStart:Expr", () => {
1671+
const df = pl.DataFrame({
1672+
os: [
1673+
"#Kali-Linux###",
1674+
"$$$Debian-Linux$",
1675+
null,
1676+
" Ubuntu-Linux ",
1677+
"Mac-Sierra",
1678+
],
1679+
chars: ["#", "$", " ", null, "Mac-"],
1680+
});
1681+
const expected = pl.DataFrame({
1682+
os: ["Kali-Linux###", "Debian-Linux$", null, "Ubuntu-Linux ", "Sierra"],
1683+
});
1684+
const actual = df.select(
1685+
col("os").str.stripCharsStart(col("chars")).as("os"),
1686+
);
1687+
expect(actual).toFrameEqual(expected);
1688+
});
1689+
test("stripCharsEnd:Expr", () => {
1690+
const df = pl.DataFrame({
1691+
os: [
1692+
"#Kali-Linux###",
1693+
"$$$Debian-Linux$",
1694+
null,
1695+
"Ubuntu-Linux ",
1696+
" Mac-Sierra",
1697+
],
1698+
chars: ["#", "$", " ", null, "-Sierra"],
1699+
});
1700+
const expected = pl.DataFrame({
1701+
os: ["#Kali-Linux", "$$$Debian-Linux", null, "Ubuntu-Linux", " Mac"],
1702+
});
1703+
const actual = df.select(
1704+
col("os").str.stripCharsEnd(col("chars")).as("os"),
1705+
);
1706+
expect(actual).toFrameEqual(expected);
1707+
});
1708+
14461709
test("padStart", () => {
14471710
const df = pl.DataFrame({
14481711
foo: ["a", "b", "cow", "longer"],

__tests__/series.test.ts

+16
Original file line numberDiff line numberDiff line change
@@ -871,6 +871,12 @@ describe("StringFunctions", () => {
871871
const serString = s.toString();
872872
expect(actualInspect).toStrictEqual(serString);
873873
});
874+
test("str contains", () => {
875+
const s = pl.Series(["linux-kali", "linux-debian", "windows-vista"]);
876+
const expected = pl.Series([true, true, false]);
877+
const encoded = s.str.contains("linux");
878+
expect(encoded).toSeriesEqual(expected);
879+
});
874880
});
875881
describe("series struct", () => {
876882
test("struct:fields", () => {
@@ -904,6 +910,16 @@ describe("series struct", () => {
904910
.toArray();
905911
expect(actual).toEqual(expected);
906912
});
913+
test("struct:nth", () => {
914+
const arr = [
915+
{ foo: 1, bar: 2, ham: "c" },
916+
{ foo: null, bar: 10, ham: null },
917+
{ foo: 2, bar: 0, ham: "z" },
918+
];
919+
const expected = [1, null, 2];
920+
const actual = pl.Series(arr).struct.nth(0).toArray();
921+
expect(actual).toEqual(expected);
922+
});
907923
});
908924
describe("generics", () => {
909925
const series = pl.Series([1, 2, 3]);

polars/lazy/expr/datetime.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ import type { DateFunctions } from "../../shared_traits";
22
import { type Expr, _Expr } from "../expr";
33

44
/**
5-
* DateTime functions
5+
* DateTime functions for Lazy dataframes
66
*/
7-
export type ExprDateTime = DateFunctions<Expr>;
7+
export interface ExprDateTime extends DateFunctions<Expr> {}
88

99
export const ExprDateTimeFunctions = (_expr: any): ExprDateTime => {
1010
const wrap = (method, ...args: any[]): Expr => {

polars/lazy/expr/index.ts

+6-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import * as dt from "./datetime";
22
import * as lst from "./list";
33
import * as str from "./string";
44
import * as struct from "./struct";
5-
export type { StringNamespace } from "./string";
5+
export type { ExprString as StringNamespace } from "./string";
66
export type { ExprList as ListNamespace } from "./list";
77
export type { ExprDateTime as DatetimeNamespace } from "./datetime";
88
export type { ExprStruct as StructNamespace } from "./struct";
@@ -53,7 +53,7 @@ export interface Expr
5353
/**
5454
* String namespace
5555
*/
56-
get str(): str.StringNamespace;
56+
get str(): str.ExprString;
5757
/**
5858
* List namespace
5959
*/
@@ -471,6 +471,7 @@ export interface Expr
471471
/** Take the first n values. */
472472
head(length?: number): Expr;
473473
head({ length }: { length: number }): Expr;
474+
implode(): Expr;
474475
inner(): any;
475476
/** Interpolate intermediate values. The interpolation method is linear. */
476477
interpolate(): Expr;
@@ -1528,6 +1529,9 @@ export const _Expr = (_expr: any): Expr => {
15281529

15291530
return wrap("head", length.length);
15301531
},
1532+
implode() {
1533+
return _Expr(_expr.implode());
1534+
},
15311535
interpolate(method: InterpolationMethod = "linear") {
15321536
return _Expr(_expr.interpolate(method));
15331537
},

0 commit comments

Comments
 (0)