Skip to content

Commit 07ddd0d

Browse files
committed
CDAP-20934 - adding new param to congiure if string value needs to converted or not
1 parent 7042ab9 commit 07ddd0d

File tree

3 files changed

+68
-2
lines changed

3 files changed

+68
-2
lines changed

wrangler-core/src/main/java/io/cdap/directives/xml/XmlToJson.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import io.cdap.wrangler.api.parser.Numeric;
3939
import io.cdap.wrangler.api.parser.TokenType;
4040
import io.cdap.wrangler.api.parser.UsageDefinition;
41+
import org.apache.commons.lang.StringUtils;
4142
import org.json.JSONException;
4243
import org.json.XML;
4344

@@ -52,16 +53,19 @@
5253
@Description("Parses a XML document to JSON representation.")
5354
public class XmlToJson implements Directive, Lineage {
5455
public static final String NAME = "parse-xml-to-json";
56+
public static final String ARG_KEEP_STRING = "keep-string";
5557
// Column within the input row that needs to be parsed as Json
5658
private String col;
5759
private int depth;
60+
private boolean keepString;
5861
private final Gson gson = new Gson();
5962

6063
@Override
6164
public UsageDefinition define() {
6265
UsageDefinition.Builder builder = UsageDefinition.builder(NAME);
6366
builder.define("column", TokenType.COLUMN_NAME);
6467
builder.define("depth", TokenType.NUMERIC, Optional.TRUE);
68+
builder.define(ARG_KEEP_STRING, TokenType.BOOLEAN, Optional.TRUE);
6569
return builder.build();
6670
}
6771

@@ -73,6 +77,12 @@ public void initialize(Arguments args) throws DirectiveParseException {
7377
} else {
7478
this.depth = Integer.MAX_VALUE;
7579
}
80+
81+
if (args.contains(ARG_KEEP_STRING) &&
82+
StringUtils.isNotEmpty(args.value(ARG_KEEP_STRING).value().toString())) {
83+
this.keepString = Boolean.parseBoolean(args.value(ARG_KEEP_STRING).value().toString());
84+
}
85+
7686
}
7787

7888
@Override
@@ -93,7 +103,7 @@ public List<Row> execute(List<Row> rows, ExecutorContext context) throws Directi
93103

94104
try {
95105
if (object instanceof String) {
96-
JsonObject element = gson.fromJson(XML.toJSONObject((String) object).toString(),
106+
JsonObject element = gson.fromJson(XML.toJSONObject((String) object, this.keepString).toString(),
97107
JsonElement.class).getAsJsonObject();
98108
JsParser.jsonFlatten(element, col, 1, depth, row);
99109
row.remove(idx);
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Copyright © 2024 Cask Data, Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
5+
* use this file except in compliance with the License. You may obtain a copy of
6+
* the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12+
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13+
* License for the specific language governing permissions and limitations under
14+
* the License.
15+
*/
16+
17+
package io.cdap.directives.parser;
18+
19+
import io.cdap.directives.xml.XmlToJson;
20+
import io.cdap.wrangler.TestingRig;
21+
import io.cdap.wrangler.api.Row;
22+
import org.junit.Assert;
23+
import org.junit.Test;
24+
25+
import java.util.Arrays;
26+
import java.util.List;
27+
28+
/**
29+
* Tests {@link XmlToJson}
30+
*/
31+
public class XmlToJsonTest {
32+
@Test
33+
public void testAutoConversionOfStringField() throws Exception {
34+
String[] directives = new String[] {
35+
"copy body body_1 true",
36+
"copy body body_2 true",
37+
"copy body body_3 true",
38+
"parse-xml-to-json body_1 1",
39+
"parse-xml-to-json body_2 1 false",
40+
"parse-xml-to-json body_3 1 true"
41+
};
42+
43+
List<Row> rows = Arrays.asList(
44+
new Row("body",
45+
"<?xml version=\"1.0\" encoding=\"UTF-8\" ?><Data><tagid>303246306303E8</tagid></Data>")
46+
);
47+
48+
rows = TestingRig.execute(directives, rows);
49+
Assert.assertEquals(1, rows.size());
50+
Assert.assertEquals("{\"tagid\":3.03246306303E19}", rows.get(0).getValue("body_1_Data").toString());
51+
Assert.assertEquals("{\"tagid\":3.03246306303E19}", rows.get(0).getValue("body_2_Data").toString());
52+
Assert.assertEquals("{\"tagid\":\"303246306303E8\"}", rows.get(0).getValue("body_3_Data").toString());
53+
}
54+
}

wrangler-docs/directives/parse-xml-to-json.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ transforms the XML into a JSON document, simplifying further parsing using the
88

99
## Syntax
1010
```
11-
parse-xml-to-json <column-name> [<depth>]
11+
parse-xml-to-json <column-name> [<depth>] [<keep-strings>]
1212
```
1313

1414
* `<column-name>` is the name of the column in the record that is an XML document.
1515
* `<depth>` indicates the depth at which the XML document parsing should terminate processing.
16+
* `<keep-strings>` An OPTIONAL boolean value that if true, then values will not be coerced into boolean or numeric values and will instead be left as strings. (as per `org.json.XML` rules)
17+
The default value is `false`
1618

1719

1820
## Usage Notes

0 commit comments

Comments
 (0)