Skip to content

Commit 5624f6a

Browse files
committed
bump-dependencies
1 parent 93c2080 commit 5624f6a

File tree

11 files changed

+74
-50
lines changed

11 files changed

+74
-50
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@ mvn clean install -DskipTests
1111
```
1212

1313
## Command line
14+
https://spark.apache.org/docs/latest/submitting-applications.html
1415
```
15-
spark-submit --master "local[*]" --class org.csuc.cli.App --packages info.picocli:picocli:4.6.2,com.crealytics:spark-excel_2.12:0.13.7,com.typesafe:config:1.3.4 --jars euroCRIS-cerif-definitions-1.6.2.jar prc-cerif-${version}.jar args
16+
spark-submit --master "local[*]" --class org.csuc.cli.App --packages info.picocli:picocli:4.6.2,com.crealytics:spark-excel_2.12:3.2.0_0.16.0,com.typesafe:config:1.4.1 --jars euroCRIS-cerif-definitions-1.6.2.jar prc-cerif-${version}.jar args
1617
```
1718
```
1819
Usage: prc-cerif [-fhV] -i=<PATH> [-o=<PATH>] -r=<STRING>

euroCRIS-cerif-definitions/pom.xml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,28 @@
2020
<dependency>
2121
<groupId>junit</groupId>
2222
<artifactId>junit</artifactId>
23+
<version>4.13.2</version>
2324
</dependency>
2425

2526
<!-- https://mvnrepository.com/artifact/javax.xml.bind/jaxb-api -->
2627
<dependency>
2728
<groupId>javax.xml.bind</groupId>
2829
<artifactId>jaxb-api</artifactId>
29-
<version>2.3.1</version>
30+
<version>2.4.0-b180830.0359</version>
3031
</dependency>
3132

3233
<!-- https://mvnrepository.com/artifact/com.sun.xml.bind/jaxb-core -->
3334
<dependency>
3435
<groupId>com.sun.xml.bind</groupId>
3536
<artifactId>jaxb-core</artifactId>
36-
<version>2.3.0.1</version>
37+
<version>3.0.1</version>
3738
</dependency>
3839

3940
<!-- https://mvnrepository.com/artifact/com.sun.xml.bind/jaxb-impl -->
4041
<dependency>
4142
<groupId>com.sun.xml.bind</groupId>
4243
<artifactId>jaxb-impl</artifactId>
43-
<version>2.3.2</version>
44+
<version>3.0.1</version>
4445
</dependency>
4546

4647
</dependencies>

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
<maven.compiler.testSource>1.8</maven.compiler.testSource>
2323
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
2424

25-
<revision>2.4.17</revision>
25+
<revision>2.4.18</revision>
2626

2727
<junit.version>4.12</junit.version>
2828
<picocli.version>4.6.2</picocli.version>

transformation/pom.xml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@
1717
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
1818

1919
<spark.version>3.2.0</spark.version>
20-
<spark.excel.version>0.13.7</spark.excel.version>
20+
<spark.excel.version>3.2.0_0.16.0</spark.excel.version>
2121
</properties>
2222

2323
<dependencies>
2424
<dependency>
2525
<groupId>junit</groupId>
2626
<artifactId>junit</artifactId>
27-
<scope>test</scope>
27+
<version>4.13.2</version>
2828
</dependency>
2929

3030
<dependency>
@@ -36,6 +36,7 @@
3636
<dependency>
3737
<groupId>com.typesafe</groupId>
3838
<artifactId>config</artifactId>
39+
<version>1.4.1</version>
3940
</dependency>
4041

4142
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->

transformation/src/main/java/org/csuc/cli/App.java

Lines changed: 48 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@
1717
import java.nio.file.Path;
1818
import java.time.Instant;
1919
import java.time.format.DateTimeFormatter;
20-
import java.util.ArrayList;
21-
import java.util.List;
2220
import java.util.Objects;
21+
import java.util.concurrent.CopyOnWriteArrayList;
2322

2423
import static org.apache.spark.sql.functions.*;
2524

@@ -73,6 +72,7 @@ public void run() {
7372
.format("com.crealytics.spark.excel") // Or .format("excel") for V2 implementation
7473
.option("dataAddress", String.format("'%s'!A1", SHEETS.researchers.value())) // Optional, default: "A1"
7574
.option("treatEmptyValuesAsNulls", "false") // Optional, default: true
75+
.option("maxRowsInMemory", 20)
7676
.option("header", "true")
7777
.load(input.toString())
7878
.toDF("_c0", "_c1", "_c2", "_c3")
@@ -87,6 +87,7 @@ public void run() {
8787
.format("com.crealytics.spark.excel") // Or .format("excel") for V2 implementation
8888
.option("dataAddress", String.format("'%s'!A1", SHEETS.departments.value())) // Optional, default: "A1"
8989
.option("treatEmptyValuesAsNulls", "false") // Optional, default: true
90+
.option("maxRowsInMemory", 20)
9091
.option("header", "true")
9192
.load(input.toString())
9293
.toDF("_c0", "_c1", "_c2", "_c3", "_c4", "_c5", "_c6")
@@ -100,7 +101,8 @@ public void run() {
100101
.read()
101102
.format("com.crealytics.spark.excel") // Or .format("excel") for V2 implementation
102103
.option("dataAddress", String.format("'%s'!A1", SHEETS.departments_relations.value())) // Optional, default: "A1"
103-
.option("treatEmptyValuesAsNulls", "false") // Optional, default: true
104+
.option("treatEmptyValuesAsNulls", "false") // Optional, default:
105+
.option("maxRowsInMemory", 20)
104106
.option("header", "true")
105107
.load(input.toString())
106108
.toDF("_c0", "_c1")
@@ -123,6 +125,7 @@ public void run() {
123125
.format("com.crealytics.spark.excel") // Or .format("excel") for V2 implementation
124126
.option("dataAddress", String.format("'%s'!A1", SHEETS.research_groups.value())) // Optional, default: "A1"
125127
.option("treatEmptyValuesAsNulls", "false") // Optional, default: true
128+
.option("maxRowsInMemory", 20)
126129
.option("header", "true")
127130
.load(input.toString())
128131
.toDF("_c0", "_c1", "_c2", "_c3", "_c4", "_c5", "_c6")
@@ -137,6 +140,7 @@ public void run() {
137140
.format("com.crealytics.spark.excel") // Or .format("excel") for V2 implementation
138141
.option("dataAddress", String.format("'%s'!A1", SHEETS.research_groups_relations.value())) // Optional, default: "A1"
139142
.option("treatEmptyValuesAsNulls", "false") // Optional, default: true
143+
.option("maxRowsInMemory", 20)
140144
.option("header", "true")
141145
.load(input.toString())
142146
.toDF("_c0", "_c1", "_c2", "_c3")
@@ -161,6 +165,7 @@ public void run() {
161165
.format("com.crealytics.spark.excel") // Or .format("excel") for V2 implementation
162166
.option("dataAddress", String.format("'%s'!A1", SHEETS.projects.value())) // Optional, default: "A1"
163167
.option("treatEmptyValuesAsNulls", "false") // Optional, default: true
168+
.option("maxRowsInMemory", 20)
164169
.option("header", "true")
165170
.load(input.toString())
166171
.toDF("_c0", "_c1", "_c2", "_c3", "_c4", "_c5", "_c6")
@@ -175,6 +180,7 @@ public void run() {
175180
.format("com.crealytics.spark.excel") // Or .format("excel") for V2 implementation
176181
.option("dataAddress", String.format("'%s'!A1", SHEETS.projects_relations.value())) // Optional, default: "A1"
177182
.option("treatEmptyValuesAsNulls", "false") // Optional, default: true
183+
.option("maxRowsInMemory", 20)
178184
.option("header", "true")
179185
.load(input.toString())
180186
.toDF("_c0", "_c1", "_c2", "_c3")
@@ -198,6 +204,7 @@ public void run() {
198204
.format("com.crealytics.spark.excel") // Or .format("excel") for V2 implementation
199205
.option("dataAddress", String.format("'%s'!A1", SHEETS.publications.value())) // Optional, default: "A1"
200206
.option("treatEmptyValuesAsNulls", "false") // Optional, default: true
207+
.option("maxRowsInMemory", 20)
201208
.option("header", "true")
202209
.load(input.toString())
203210
.toDF("_c0", "_c1", "_c2", "_c3", "_c4", "_c5", "_c6", "_c7", "_c8", "_c9", "_c10", "_c11", "_c12", "_c13", "_c14")
@@ -212,6 +219,7 @@ public void run() {
212219
.format("com.crealytics.spark.excel") // Or .format("excel") for V2 implementation
213220
.option("dataAddress", String.format("'%s'!A1", SHEETS.publication_relations.value())) // Optional, default: "A1"
214221
.option("treatEmptyValuesAsNulls", "false") // Optional, default: true
222+
.option("maxRowsInMemory", 20)
215223
.option("header", "true")
216224
.load(input.toString())
217225
.toDF("_c0", "_c1", "_c2", "_c3")
@@ -234,43 +242,49 @@ public void run() {
234242
Dataset<Row> research_groups_join = research_groups.join(research_groups_relations, col("research_groups._c4").equalTo(col("research_groups_relations._c0")), "left").drop(col("research_groups_relations._c0"));
235243
Dataset<Row> publication_join = publications.join(publication_relations, col("publications._c1").equalTo(col("publication_relations._c0")), "left").drop(col("publication_relations._c0"));
236244

237-
// projects_join.show(false);
238-
// departments_join.show(false);
239-
// research_groups_join.show(false);
240-
// publication_join.show(false);
241-
242245
//CERIF
243246
Marshaller marshaller = new Marshaller(ruct);
244247

245-
List<CfPersType> cfPersTypeList = new ArrayList<>();
246-
List<CfOrgUnitType> cfOrgUnitTypeList = new ArrayList<>();
247-
List<CfProjType> cfProjTypeList = new ArrayList<>();
248-
List<CfResPublType> cfResPublTypeList = new ArrayList<>();
249-
250-
researchers.collectAsList().forEach(row -> {
251-
cfPersTypeList.add(new Researcher(row, Semantics.getClassId(ClassId.CHECKED)));
252-
});
253-
254-
departments_join.collectAsList().forEach(row -> {
255-
cfOrgUnitTypeList.add(new Department(row, Semantics.getClassId(ClassId.DEPARTMENT_OR_INSTITUTE)));
256-
});
257-
258-
research_groups_join.collectAsList().forEach(row -> {
259-
cfOrgUnitTypeList.add(new ResearchGroup(row, Semantics.getClassId(ClassId.RESEARCH_GROUP), cfPersTypeList));
260-
});
261-
262-
projects_join.collectAsList().forEach(row -> {
263-
cfProjTypeList.add(new Project(row, cfPersTypeList));
264-
});
265-
266-
publication_join.collectAsList().forEach(row -> {
267-
cfResPublTypeList.add(new Publication(row, cfPersTypeList));
268-
});
248+
CopyOnWriteArrayList<CfPersType> cfPersTypeList = new CopyOnWriteArrayList<>();
249+
CopyOnWriteArrayList<CfOrgUnitType> cfOrgUnitTypeList = new CopyOnWriteArrayList<>();
250+
CopyOnWriteArrayList<CfOrgUnitType> cfOrgUnitTypeList_2 = new CopyOnWriteArrayList<>();
251+
CopyOnWriteArrayList<CfProjType> cfProjTypeList = new CopyOnWriteArrayList<>();
252+
CopyOnWriteArrayList<CfResPublType> cfResPublTypeList = new CopyOnWriteArrayList<>();
253+
254+
if (researchers.count() > 0) {
255+
researchers.collectAsList().forEach(row -> {
256+
cfPersTypeList.add(new Researcher(row, Semantics.getClassId(ClassId.CHECKED)));
257+
});
258+
}
259+
260+
if (departments_join.count() > 0) {
261+
departments_join.collectAsList().forEach(row -> {
262+
cfOrgUnitTypeList.add(new Department(row, Semantics.getClassId(ClassId.DEPARTMENT_OR_INSTITUTE)));
263+
});
264+
}
265+
266+
if (research_groups_join.count() > 0) {
267+
research_groups_join.collectAsList().forEach(row -> {
268+
cfOrgUnitTypeList_2.add(new ResearchGroup(row, Semantics.getClassId(ClassId.RESEARCH_GROUP), cfPersTypeList));
269+
});
270+
}
271+
272+
if (projects_join.count() > 0) {
273+
projects_join.collectAsList().forEach(row -> {
274+
cfProjTypeList.add(new Project(row, cfPersTypeList));
275+
});
276+
}
277+
278+
if (publication_join.count() > 0) {
279+
publication_join.collectAsList().forEach(row -> {
280+
cfResPublTypeList.add(new Publication(row, cfPersTypeList));
281+
});
282+
}
269283

270284
if (Objects.isNull(output))
271-
marshaller.buld(String.format("/tmp/%s.xml", ruct), formatted, cfPersTypeList, cfOrgUnitTypeList, cfProjTypeList, cfResPublTypeList);
285+
marshaller.build(String.format("/tmp/%s.xml", ruct), formatted, cfPersTypeList, cfOrgUnitTypeList, cfProjTypeList, cfResPublTypeList);
272286
else
273-
marshaller.buld(output.toString(), formatted, cfPersTypeList, cfOrgUnitTypeList, cfProjTypeList, cfResPublTypeList);
287+
marshaller.build(output.toString(), formatted, cfPersTypeList, cfOrgUnitTypeList, cfProjTypeList, cfResPublTypeList);
274288

275289

276290
sparkSession.log().info("Saved output {}", Objects.isNull(output) ? String.format("/tmp/%s.xml", ruct) : output);

transformation/src/main/java/org/csuc/marshal/Department.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,13 @@
66
import org.csuc.typesafe.semantics.Semantics;
77
import xmlns.org.eurocris.cerif_1.*;
88

9+
import java.io.Serializable;
910
import java.util.List;
1011
import java.util.Objects;
1112
import java.util.UUID;
1213
import java.util.stream.Stream;
1314

14-
public class Department extends CfOrgUnitType {
15+
public class Department extends CfOrgUnitType implements Serializable {
1516

1617
private ObjectFactory FACTORY = new ObjectFactory();
1718
private Row row;
@@ -114,8 +115,10 @@ private void createRelationCfPers() {
114115
List<Row> relations = row.getList(8);
115116

116117
relations.forEach(relation -> {
117-
if (Objects.nonNull(relation.getAs(2))) {
118-
researcher(relation.getAs(2));
118+
if(relations.size() == 2){
119+
if (Objects.nonNull(relation.getAs(2))) {
120+
researcher(relation.getAs(2));
121+
}
119122
}
120123
});
121124
}

transformation/src/main/java/org/csuc/marshal/Marshaller.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ public Marshaller(String ruct) throws DatatypeConfigurationException {
2525
cerif.setSourceDatabase(ruct);
2626
}
2727

28-
public void buld(String output, boolean formatted, List... objects) throws JAXBException, FileNotFoundException {
28+
public void build(String output, boolean formatted, List... objects) throws JAXBException, FileNotFoundException {
2929
Arrays.stream(objects).forEach(o -> {
3030
cerif.getCfClassOrCfClassSchemeOrCfClassSchemeDescr().addAll(o);
3131
});
@@ -34,7 +34,7 @@ public void buld(String output, boolean formatted, List... objects) throws JAXBE
3434
javax.xml.bind.Marshaller jaxbMarshaller = jaxbContext.createMarshaller();
3535

3636
jaxbMarshaller.setProperty(javax.xml.bind.Marshaller.JAXB_ENCODING, StandardCharsets.UTF_8.toString());
37-
jaxbMarshaller.setProperty(javax.xml.bind.Marshaller.JAXB_FORMATTED_OUTPUT, false);
37+
jaxbMarshaller.setProperty(javax.xml.bind.Marshaller.JAXB_FORMATTED_OUTPUT, formatted);
3838
jaxbMarshaller.setProperty(javax.xml.bind.Marshaller.JAXB_FRAGMENT, false);
3939

4040
jaxbMarshaller.marshal(cerif, new FileOutputStream(output));

transformation/src/main/java/org/csuc/marshal/Project.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import org.csuc.typesafe.semantics.Semantics;
99
import xmlns.org.eurocris.cerif_1.*;
1010

11+
import java.io.Serializable;
1112
import java.util.List;
1213
import java.util.Objects;
1314
import java.util.UUID;
@@ -17,7 +18,7 @@
1718
* | Títol| URL| Codi oficial| Codi intern| Programa| Data inici|Data fi| uuid|
1819
* +--------------------+--------------------+--------------+--------------------+-------------------+-------------------+-------+--------------------+
1920
*/
20-
public class Project extends CfProjType {
21+
public class Project extends CfProjType implements Serializable {
2122
private ObjectFactory FACTORY = new ObjectFactory();
2223

2324
private Row row;

transformation/src/main/java/org/csuc/marshal/Publication.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,13 @@
99
import org.csuc.utils.DocumentTypes;
1010
import xmlns.org.eurocris.cerif_1.*;
1111

12+
import java.io.Serializable;
1213
import java.util.List;
1314
import java.util.Objects;
1415
import java.util.UUID;
1516
import java.util.stream.Stream;
1617

17-
public class Publication extends CfResPublType {
18+
public class Publication extends CfResPublType implements Serializable {
1819

1920
private ObjectFactory FACTORY = new ObjectFactory();
2021
private Row row;

transformation/src/main/java/org/csuc/marshal/ResearchGroup.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88
import org.csuc.typesafe.semantics.Semantics;
99
import xmlns.org.eurocris.cerif_1.*;
1010

11+
import java.io.Serializable;
1112
import java.util.List;
1213
import java.util.Objects;
1314
import java.util.UUID;
1415
import java.util.stream.Stream;
1516

16-
public class ResearchGroup extends CfOrgUnitType {
17+
public class ResearchGroup extends CfOrgUnitType implements Serializable {
1718

1819
private ObjectFactory FACTORY = new ObjectFactory();
1920
private Row row;

0 commit comments

Comments
 (0)