Skip to content

Commit ca8b7b8

Browse files
authored
Feature/add str padding methods (pola-rs#41)
* Added option interface to io functions. * Made interfaces PascalCase * Fixed Errors from tests Two tests failing - don't know why * Added padStart, padEnd and justify to lazy str. * Edited jsdoc * Added example * Edit on example * Reformat arguments Reformat justify to zfill * Corrected test to reformat * Added padStart, padEnd and zFill to series * Removed dtype-struct from features * Removed todo * Fixed error * Deleted this * Code formatting
1 parent 1a7cef0 commit ca8b7b8

File tree

7 files changed

+345
-10
lines changed

7 files changed

+345
-10
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ features = [
6666
"arange",
6767
"true_div",
6868
"dtype-categorical",
69+
"string_justify",
6970
"diagonal_concat",
7071
"horizontal_concat",
7172
"abs",

__tests__/expr.test.ts

+48
Original file line numberDiff line numberDiff line change
@@ -1190,6 +1190,54 @@ describe("expr.str", () => {
11901190
expect(actual).toFrameEqual(expected);
11911191
expect(seriesActual).toFrameEqual(expected);
11921192
});
1193+
test("padStart", () => {
1194+
const df = pl.DataFrame({
1195+
foo: ["a", "b", "cow", "longer"],
1196+
});
1197+
const expected = pl.DataFrame({
1198+
foo: ["__a", "__b", "cow", "longer"],
1199+
});
1200+
const seriesActual = df
1201+
.getColumn("foo")
1202+
.str.padStart(3, "_")
1203+
.rename("foo")
1204+
.toFrame();
1205+
const actual = df.select(col("foo").str.padStart(3, "_").as("foo"));
1206+
expect(actual).toFrameEqual(expected);
1207+
expect(seriesActual).toFrameEqual(expected);
1208+
});
1209+
test("padEnd", () => {
1210+
const df = pl.DataFrame({
1211+
foo: ["a", "b", "cow", "longer"],
1212+
});
1213+
const expected = pl.DataFrame({
1214+
foo: ["a__", "b__", "cow", "longer"],
1215+
});
1216+
const seriesActual = df
1217+
.getColumn("foo")
1218+
.str.padEnd(3, "_")
1219+
.rename("foo")
1220+
.toFrame();
1221+
const actual = df.select(col("foo").str.padEnd(3, "_").as("foo"));
1222+
expect(actual).toFrameEqual(expected);
1223+
expect(seriesActual).toFrameEqual(expected);
1224+
});
1225+
test("zFill", () => {
1226+
const df = pl.DataFrame({
1227+
foo: ["a", "b", "cow", "longer"],
1228+
});
1229+
const expected = pl.DataFrame({
1230+
foo: ["00a", "00b", "cow", "longer"],
1231+
});
1232+
const seriesActual = df
1233+
.getColumn("foo")
1234+
.str.zFill(3)
1235+
.rename("foo")
1236+
.toFrame();
1237+
const actual = df.select(col("foo").str.zFill(3).as("foo"));
1238+
expect(actual).toFrameEqual(expected);
1239+
expect(seriesActual).toFrameEqual(expected);
1240+
});
11931241
test("hex encode", () => {
11941242
const df = pl.DataFrame({
11951243
original: ["foo", "bar", null],

__tests__/lazyframe.test.ts

+39
Original file line numberDiff line numberDiff line change
@@ -1042,4 +1042,43 @@ describe("lazyframe", () => {
10421042
});
10431043
expect(actual).toFrameEqual(expected);
10441044
});
1045+
test("str:padStart", () => {
1046+
const actual = pl.DataFrame({
1047+
"ham": ["a", "b", "c"]
1048+
}).lazy()
1049+
.withColumn(
1050+
pl.col("ham").str.padStart(3, "-")
1051+
)
1052+
.collectSync();
1053+
const expected = pl.DataFrame({
1054+
"ham": ["--a", "--b", "--c"]
1055+
});
1056+
expect(actual).toFrameEqual(expected);
1057+
});
1058+
test("str:padEnd", () => {
1059+
const actual = pl.DataFrame({
1060+
"ham": ["a", "b", "c"]
1061+
}).lazy()
1062+
.withColumn(
1063+
pl.col("ham").str.padEnd(3, "-")
1064+
)
1065+
.collectSync();
1066+
const expected = pl.DataFrame({
1067+
"ham": ["a--", "b--", "c--"]
1068+
});
1069+
expect(actual).toFrameEqual(expected);
1070+
});
1071+
test("str:zFill", () => {
1072+
const actual = pl.DataFrame({
1073+
"ham": ["a", "b", "c"]
1074+
}).lazy()
1075+
.withColumn(
1076+
pl.col("ham").str.zFill(3)
1077+
)
1078+
.collectSync();
1079+
const expected = pl.DataFrame({
1080+
"ham": ["00a", "00b", "00c"]
1081+
});
1082+
expect(actual).toFrameEqual(expected);
1083+
});
10451084
});

polars/lazy/expr/string.ts

+116-9
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ export interface ExprString {
5151
* └─────────┘
5252
* ```
5353
*/
54-
decode(encoding: "hex" | "base64", strict?: boolean): Expr
55-
decode(options: {encoding: "hex" | "base64", strict?: boolean}): Expr
54+
decode(encoding: "hex" | "base64", strict?: boolean): Expr;
55+
decode(options: { encoding: "hex" | "base64"; strict?: boolean }): Expr;
5656
/**
5757
* Encodes a value using the provided encoding
5858
* @param encoding - hex | base64
@@ -74,7 +74,7 @@ export interface ExprString {
7474
* └─────────┘
7575
* ```
7676
*/
77-
encode(encoding: "hex" | "base64"): Expr
77+
encode(encoding: "hex" | "base64"): Expr;
7878
/**
7979
* Extract the target capture group from provided patterns.
8080
* @param pattern A valid regex pattern
@@ -140,7 +140,7 @@ export interface ExprString {
140140
/** Get length of the string values in the Series. */
141141
lengths(): Expr;
142142
/** Remove leading whitespace. */
143-
lstrip(): Expr
143+
lstrip(): Expr;
144144
/** Replace first regex match with a string value. */
145145
replace(pat: string | RegExp, val: string): Expr;
146146
/** Replace all regex matches with a string value. */
@@ -150,7 +150,105 @@ export interface ExprString {
150150
/** Modify the strings to their uppercase equivalent. */
151151
toUpperCase(): Expr;
152152
/** Remove trailing whitespace. */
153-
rstrip(): Expr
153+
rstrip(): Expr;
154+
/**
155+
* Add a leading fillChar to a string until string length is reached.
156+
* If string is longer or equal to given length no modifications will be done
157+
* @param {number} length - of the final string
158+
* @param {string} fillChar - that will fill the string.
159+
* @note If a string longer than 1 character is provided only the first character will be used
160+
* @example
161+
* ```
162+
* > df = pl.DataFrame({
163+
* ... 'foo': [
164+
* ... "a",
165+
* ... "b",
166+
* ... "LONG_WORD",
167+
* ... "cow"
168+
* ... ]})
169+
* > df.select(pl.col('foo').str.padStart("_", 3)
170+
* shape: (4, 1)
171+
* ┌──────────┐
172+
* │ a │
173+
* │ -------- │
174+
* │ str │
175+
* ╞══════════╡
176+
* │ __a │
177+
* ├╌╌╌╌╌╌╌╌╌╌┤
178+
* │ __b │
179+
* ├╌╌╌╌╌╌╌╌╌╌┤
180+
* │ LONG_WORD│
181+
* ├╌╌╌╌╌╌╌╌╌╌┤
182+
* │ cow │
183+
* └──────────┘
184+
* ```
185+
*/
186+
padStart(length: number, fillChar: string): Expr;
187+
/**
188+
* Add leading "0" to a string until string length is reached.
189+
* If string is longer or equal to given length no modifications will be done
190+
* @param {number} length - of the final string
191+
* @see {@link padStart}
192+
* * @example
193+
* ```
194+
* > df = pl.DataFrame({
195+
* ... 'foo': [
196+
* ... "a",
197+
* ... "b",
198+
* ... "LONG_WORD",
199+
* ... "cow"
200+
* ... ]})
201+
* > df.select(pl.col('foo').str.justify(3)
202+
* shape: (4, 1)
203+
* ┌──────────┐
204+
* │ a │
205+
* │ -------- │
206+
* │ str │
207+
* ╞══════════╡
208+
* │ 00a │
209+
* ├╌╌╌╌╌╌╌╌╌╌┤
210+
* │ 00b │
211+
* ├╌╌╌╌╌╌╌╌╌╌┤
212+
* │ LONG_WORD│
213+
* ├╌╌╌╌╌╌╌╌╌╌┤
214+
* │ cow │
215+
* └──────────┘
216+
* ```
217+
*/
218+
zFill(length: number): Expr;
219+
/**
220+
* Add a trailing fillChar to a string until string length is reached.
221+
* If string is longer or equal to given length no modifications will be done
222+
* @param {number} length - of the final string
223+
* @param {string} fillChar - that will fill the string.
224+
* @note If a string longer than 1 character is provided only the first character will be used
225+
* * @example
226+
* ```
227+
* > df = pl.DataFrame({
228+
* ... 'foo': [
229+
* ... "a",
230+
* ... "b",
231+
* ... "LONG_WORD",
232+
* ... "cow"
233+
* ... ]})
234+
* > df.select(pl.col('foo').str.padEnd("_", 3)
235+
* shape: (4, 1)
236+
* ┌──────────┐
237+
* │ a │
238+
* │ -------- │
239+
* │ str │
240+
* ╞══════════╡
241+
* │ a__ │
242+
* ├╌╌╌╌╌╌╌╌╌╌┤
243+
* │ b__ │
244+
* ├╌╌╌╌╌╌╌╌╌╌┤
245+
* │ LONG_WORD│
246+
* ├╌╌╌╌╌╌╌╌╌╌┤
247+
* │ cow │
248+
* └──────────┘
249+
* ```
250+
*/
251+
padEnd(length: number, fillChar: string): Expr;
154252
/**
155253
* Create subslices of the string values of a Utf8 Series.
156254
* @param start - Start of the slice (negative indexing may be used).
@@ -162,16 +260,16 @@ export interface ExprString {
162260
* @param separator — A string that identifies character or characters to use in separating the string.
163261
* @param inclusive Include the split character/string in the results
164262
*/
165-
split(by: string, options?: {inclusive?: boolean} | boolean): Expr
263+
split(by: string, options?: { inclusive?: boolean } | boolean): Expr;
166264
/** Remove leading and trailing whitespace. */
167-
strip(): Expr
265+
strip(): Expr;
168266
/**
169267
* Parse a Series of dtype Utf8 to a Date/Datetime Series.
170268
* @param datatype Date or Datetime.
171269
* @param fmt formatting syntax. [Read more](https://docs.rs/chrono/0.4.19/chrono/format/strftime/index.html)
172270
*/
173-
strptime(datatype: DataType.Date, fmt?: string): Expr
174-
strptime(datatype: DataType.Datetime, fmt?: string): Expr
271+
strptime(datatype: DataType.Date, fmt?: string): Expr;
272+
strptime(datatype: DataType.Datetime, fmt?: string): Expr;
175273
}
176274

177275
export const ExprStringFunctions = (_expr: any): ExprString => {
@@ -235,6 +333,15 @@ export const ExprStringFunctions = (_expr: any): ExprString => {
235333
rstrip() {
236334
return wrap("strRstrip");
237335
},
336+
padStart(length: number, fillChar: string){
337+
return wrap("strPadStart", length, fillChar);
338+
},
339+
zFill(length: number) {
340+
return wrap("strZFill", length);
341+
},
342+
padEnd(length: number, fillChar: string) {
343+
return wrap("strPadEnd", length, fillChar);
344+
},
238345
slice(start: number, length?: number) {
239346
return wrap("strSlice", start, length);
240347
},

polars/series/string.ts

+75
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,72 @@ export interface StringFunctions {
128128
lengths(): Series
129129
/** Remove leading whitespace. */
130130
lstrip(): Series
131+
/**
132+
* Add a leading fillChar to a string until string length is reached.
133+
* If string is longer or equal to given length no modifications will be done
134+
* @param {number} length - of the final string
135+
* @param {string} fillChar - that will fill the string.
136+
* @note If a string longer than 1 character is provided only the first character will be used
137+
* @example
138+
* ```
139+
* > df = pl.DataFrame({
140+
* ... 'foo': [
141+
* ... "a",
142+
* ... "b",
143+
* ... "LONG_WORD",
144+
* ... "cow"
145+
* ... ]})
146+
* > df.select(pl.col('foo').str.padStart("_", 3)
147+
* shape: (4, 1)
148+
* ┌──────────┐
149+
* │ a │
150+
* │ -------- │
151+
* │ str │
152+
* ╞══════════╡
153+
* │ __a │
154+
* ├╌╌╌╌╌╌╌╌╌╌┤
155+
* │ __b │
156+
* ├╌╌╌╌╌╌╌╌╌╌┤
157+
* │ LONG_WORD│
158+
* ├╌╌╌╌╌╌╌╌╌╌┤
159+
* │ cow │
160+
* └──────────┘
161+
* ```
162+
*/
163+
padStart(length: number, fillChar: string): Series
164+
/**
165+
* Add a leading '0' to a string until string length is reached.
166+
* If string is longer or equal to given length no modifications will be done
167+
* @param {number} length - of the final string
168+
* @example
169+
* ```
170+
* > df = pl.DataFrame({
171+
* ... 'foo': [
172+
* ... "a",
173+
* ... "b",
174+
* ... "LONG_WORD",
175+
* ... "cow"
176+
* ... ]})
177+
* > df.select(pl.col('foo').str.padStart(3)
178+
* shape: (4, 1)
179+
* ┌──────────┐
180+
* │ a │
181+
* │ -------- │
182+
* │ str │
183+
* ╞══════════╡
184+
* │ 00a │
185+
* ├╌╌╌╌╌╌╌╌╌╌┤
186+
* │ 00b │
187+
* ├╌╌╌╌╌╌╌╌╌╌┤
188+
* │ LONG_WORD│
189+
* ├╌╌╌╌╌╌╌╌╌╌┤
190+
* │ cow │
191+
* └──────────┘
192+
* ```
193+
*/
194+
zFill(length: number): Series
195+
/** Add trailing zeros */
196+
padEnd(length: number, fillChar: string): Series
131197
/**
132198
* Replace first regex match with a string value.
133199
* @param pattern A valid regex pattern
@@ -236,6 +302,15 @@ export const StringFunctions = (_s: any): StringFunctions => {
236302
lstrip() {
237303
return wrap("strReplace", /^\s*/.source, "");
238304
},
305+
padStart(length: number, fillChar: string) {
306+
return wrap("strPadStart", length, fillChar);
307+
},
308+
zFill(length: number) {
309+
return wrap("strZFill", length);
310+
},
311+
padEnd(length: number, fillChar: string) {
312+
return wrap("strPadEnd", length, fillChar);
313+
},
239314
replace(pat: RegExp, val: string) {
240315
return wrap("strReplace", regexToString(pat), val);
241316
},

0 commit comments

Comments
 (0)