Skip to content

Commit c7747d6

Browse files
Merge pull request #95 from CSUC/spark-excel
Spark excel
2 parents 5624f6a + 348ac05 commit c7747d6

File tree

8 files changed

+25
-28
lines changed

8 files changed

+25
-28
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ mvn clean install -DskipTests
1313
## Command line
1414
https://spark.apache.org/docs/latest/submitting-applications.html
1515
```
16-
spark-submit --master "local[*]" --class org.csuc.cli.App --packages info.picocli:picocli:4.6.2,com.crealytics:spark-excel_2.12:3.2.0_0.16.0,com.typesafe:config:1.4.1 --jars euroCRIS-cerif-definitions-1.6.2.jar prc-cerif-${version}.jar args
16+
spark-submit --master "local[*]" --class org.csuc.cli.Cerif --packages info.picocli:picocli:4.6.3,com.crealytics:spark-excel_2.12:3.2.1_0.16.4,com.typesafe:config:1.4.2 --jars euroCRIS-cerif-definitions-1.6.2.jar prc-cerif-${version}.jar args
1717
```
1818
```
1919
Usage: prc-cerif [-fhV] -i=<PATH> [-o=<PATH>] -r=<STRING>

pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
<revision>2.4.18</revision>
2626

2727
<junit.version>4.12</junit.version>
28-
<picocli.version>4.6.2</picocli.version>
29-
<typesafe.config.version>1.3.4</typesafe.config.version>
28+
<picocli.version>4.6.3</picocli.version>
29+
<typesafe.config.version>1.4.2</typesafe.config.version>
3030
</properties>
3131

3232
<dependencyManagement>

transformation/pom.xml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
1818

1919
<spark.version>3.2.0</spark.version>
20-
<spark.excel.version>3.2.0_0.16.0</spark.excel.version>
20+
<spark.excel.version>3.2.1_0.16.4</spark.excel.version>
2121
</properties>
2222

2323
<dependencies>
@@ -36,7 +36,6 @@
3636
<dependency>
3737
<groupId>com.typesafe</groupId>
3838
<artifactId>config</artifactId>
39-
<version>1.4.1</version>
4039
</dependency>
4140

4241
<!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->

transformation/src/main/java/org/csuc/cli/App.java renamed to transformation/src/main/java/org/csuc/cli/Cerif.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
"\tOS: ${os.name} ${os.version} ${os.arch}"
3535
}
3636
)
37-
public class App implements Runnable {
37+
public class Cerif implements Runnable {
3838
@CommandLine.Spec
3939
CommandLine.Model.CommandSpec spec;
4040

@@ -53,7 +53,7 @@ public class App implements Runnable {
5353
private Instant inici;
5454

5555
public static void main(String[] args) {
56-
CommandLine cmd = new CommandLine(new App());
56+
CommandLine cmd = new CommandLine(new Cerif());
5757
if (args.length == 0) cmd.usage(System.out);
5858
else cmd.execute(args);
5959
}
@@ -242,12 +242,13 @@ public void run() {
242242
Dataset<Row> research_groups_join = research_groups.join(research_groups_relations, col("research_groups._c4").equalTo(col("research_groups_relations._c0")), "left").drop(col("research_groups_relations._c0"));
243243
Dataset<Row> publication_join = publications.join(publication_relations, col("publications._c1").equalTo(col("publication_relations._c0")), "left").drop(col("publication_relations._c0"));
244244

245+
//departments_join.write().parquet("/tmp/departments_join");
246+
245247
//CERIF
246248
Marshaller marshaller = new Marshaller(ruct);
247249

248250
CopyOnWriteArrayList<CfPersType> cfPersTypeList = new CopyOnWriteArrayList<>();
249251
CopyOnWriteArrayList<CfOrgUnitType> cfOrgUnitTypeList = new CopyOnWriteArrayList<>();
250-
CopyOnWriteArrayList<CfOrgUnitType> cfOrgUnitTypeList_2 = new CopyOnWriteArrayList<>();
251252
CopyOnWriteArrayList<CfProjType> cfProjTypeList = new CopyOnWriteArrayList<>();
252253
CopyOnWriteArrayList<CfResPublType> cfResPublTypeList = new CopyOnWriteArrayList<>();
253254

@@ -265,7 +266,7 @@ public void run() {
265266

266267
if (research_groups_join.count() > 0) {
267268
research_groups_join.collectAsList().forEach(row -> {
268-
cfOrgUnitTypeList_2.add(new ResearchGroup(row, Semantics.getClassId(ClassId.RESEARCH_GROUP), cfPersTypeList));
269+
cfOrgUnitTypeList.add(new ResearchGroup(row, Semantics.getClassId(ClassId.RESEARCH_GROUP), cfPersTypeList));
269270
});
270271
}
271272

transformation/src/main/java/org/csuc/marshal/Department.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,11 @@ private void createRelationCfPers() {
114114
if (Objects.nonNull(row.getAs(8))) {
115115
List<Row> relations = row.getList(8);
116116

117-
relations.forEach(relation -> {
118-
if(relations.size() == 2){
119-
if (Objects.nonNull(relation.getAs(2))) {
120-
researcher(relation.getAs(2));
121-
}
122-
}
123-
});
117+
if(!relations.isEmpty()){
118+
relations.forEach(relation -> {
119+
if(Objects.nonNull(relation.get(1))) researcher(relation.getString(1));
120+
});
121+
}
124122
}
125123
}
126124

transformation/src/main/java/org/csuc/marshal/Project.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,11 +124,10 @@ private void createRelationCfPers() {
124124
private void researcher(String id, String ip) {
125125
CfProjType.CfProjPers pers = new CfProjType.CfProjPers();
126126
pers.setCfPersId(id);
127-
if (ip.toLowerCase().equals("si")
128-
|| ip.toLowerCase().equals("s"))
129-
pers.setCfClassId(Semantics.getClassId(ClassId.PRINCIPAL_INVESTIGATOR));
130-
else if (ip.toLowerCase().equals("no")
131-
|| ip.toLowerCase().equals("n")) pers.setCfClassId(Semantics.getClassId(ClassId.CO_INVESTIGATOR));
127+
128+
if(Objects.nonNull(ip) && (ip.toLowerCase().equals("si")
129+
|| ip.toLowerCase().equals("s"))) pers.setCfClassId(Semantics.getClassId(ClassId.PRINCIPAL_INVESTIGATOR));
130+
else pers.setCfClassId(Semantics.getClassId(ClassId.CO_INVESTIGATOR));
132131

133132
pers.setCfClassSchemeId(Semantics.getSchemaId(SchemeId.PERSON_PROJECT_ENGAGEMENTS));
134133
getCfTitleOrCfAbstrOrCfKeyw().add(FACTORY.createCfProjTypeCfProjPers(pers));

transformation/src/main/java/org/csuc/marshal/Publication.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,9 @@ private void createRelationCfPers() {
176176
private void researcher(String id, String direccio) {
177177
CfResPublType.CfPersResPubl pers = new CfResPublType.CfPersResPubl();
178178
pers.setCfPersId(id);
179-
if (direccio.toLowerCase().equals("si")
180-
|| direccio.toLowerCase().equals("s")) pers.setCfClassId(Semantics.getClassId(ClassId.DISS_DIRECTOR));
179+
180+
if(Objects.nonNull(direccio) && (direccio.toLowerCase().equals("si")
181+
|| direccio.toLowerCase().equals("s"))) pers.setCfClassId(Semantics.getClassId(ClassId.DISS_DIRECTOR));
181182
else pers.setCfClassId(Semantics.getClassId(ClassId.AUTHOR));
182183

183184
pers.setCfClassSchemeId(Semantics.getSchemaId(SchemeId.PERSON_PROFESSIONAL_RELATIONSHIPS));

transformation/src/main/java/org/csuc/marshal/ResearchGroup.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,10 @@ private void createRelationCfPers() {
136136
private void researcher(String id, String interve) {
137137
CfOrgUnitType.CfPersOrgUnit persOrgUnit = new CfOrgUnitType.CfPersOrgUnit();
138138
persOrgUnit.setCfPersId(id);
139-
if (interve.toLowerCase().equals("si")
140-
|| interve.toLowerCase().equals("s"))
141-
persOrgUnit.setCfClassId(Semantics.getClassId(ClassId.GROUP_LEADER));
142-
else if (interve.toLowerCase().equals("no")
143-
|| interve.toLowerCase().equals("n")) persOrgUnit.setCfClassId(Semantics.getClassId(ClassId.MEMBER));
139+
140+
if(Objects.nonNull(interve) && (interve.toLowerCase().equals("si")
141+
|| interve.toLowerCase().equals("s"))) persOrgUnit.setCfClassId(Semantics.getClassId(ClassId.GROUP_LEADER));
142+
else persOrgUnit.setCfClassId(Semantics.getClassId(ClassId.MEMBER));
144143

145144
persOrgUnit.setCfClassSchemeId(Semantics.getSchemaId(SchemeId.PERSON_ORGANISATION_ROLES));
146145
getCfNameOrCfResActOrCfKeyw().add(FACTORY.createCfOrgUnitTypeCfPersOrgUnit(persOrgUnit));

0 commit comments

Comments
 (0)