AJ-1452: get schema (#15)

davidangb · web-flow · commit f843cb895c73 · 2023-11-17T08:59:54.000-05:00
add PfbReader.getPfbSchema method
diff --git a/library/src/main/java/bio/terra/pfb/PfbReader.java b/library/src/main/java/bio/terra/pfb/PfbReader.java
@@ -28,11 +28,7 @@ public class PfbReader {
 
   public static String showSchema(String fileLocation) throws IOException {
     return convertEnum(
-        readPfbSchema(fileLocation).getField("object").schema().getTypes().stream()
-            .filter(t -> !t.getName().equals("Metadata"))
-            .map(Schema::toString)
-            .toList()
-            .toString());
+        getPfbSchema(fileLocation).stream().map(Schema::toString).toList().toString());
   }
 
   public static String showNodes(String fileLocation) throws IOException {
@@ -97,6 +93,14 @@ public static Metadata getPfbMetadata(String fileLocation) throws IOException {
     throw new InvalidPfbException("Error reading PFB Metadata object");
   }
 
+  // note that this does not decode enum values via convertEnum. WDS does not need decoding;
+  // if other clients do need it we should add it in here.
+  public static List<Schema> getPfbSchema(String fileLocation) throws IOException {
+    return readPfbSchema(fileLocation).getField("object").schema().getTypes().stream()
+        .filter(t -> !t.getName().equals("Metadata"))
+        .toList();
+  }
+
   static Schema readPfbSchema(String fileLocation) throws IOException {
     DatumReader<Entity> datumReader = new SpecificDatumReader<>(Entity.class);
     URL url = isValidUrl(fileLocation);
diff --git a/library/src/test/java/bio/terra/pfb/PfbReaderTest.java b/library/src/test/java/bio/terra/pfb/PfbReaderTest.java
@@ -3,13 +3,20 @@
 import static bio.terra.pfb.utils.CompareOutputUtils.FileExtension.JSON;
 import static bio.terra.pfb.utils.CompareOutputUtils.FileExtension.TXT;
 import static bio.terra.pfb.utils.CompareOutputUtils.PfbCommandType.*;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
 import bio.terra.pfb.exceptions.InvalidPfbException;
 import bio.terra.pfb.utils.CompareOutputUtils;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
 import java.io.IOException;
 import java.util.List;
+import java.util.Spliterator;
+import java.util.Spliterators;
 import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+import org.apache.avro.Schema;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
@@ -38,6 +45,39 @@ void showSchemaTest(String fileName) throws IOException {
     }
   }
 
+  // this test validates behavior of the PfbReader.getPfbSchema() method. This method is
+  // used internally by PfbReader.showSchema, which is thoroughly tested by {@link
+  // #showsSchemaTest()} therefore,
+  // this test only performs cursory correctness checks.
+  @ParameterizedTest
+  @MethodSource("provideTestFiles")
+  void getPfbSchemaTest(String fileName) throws IOException {
+    // TODO - remove when fix is added for AJ-1288
+    if (fileName.equals("empty")) {
+      logger.error("Skipping test file: {} until fixed in AJ-1288\n", fileName);
+    } else {
+      // read the pypfb output for this file
+      String expectedStr = CompareOutputUtils.getPyPfbOutput(fileName, SHOW_SCHEMA, JSON);
+      // parse the pypfb output for this file
+      ObjectMapper mapper = new ObjectMapper();
+      JsonNode expected = mapper.readTree(expectedStr);
+      // find the names of all top-level types from the pypfb output
+      List<String> expectedNames =
+          StreamSupport.stream(
+                  Spliterators.spliteratorUnknownSize(expected.elements(), Spliterator.ORDERED),
+                  false)
+              .map(typeNode -> typeNode.get("name").asText())
+              .toList();
+      // get the PfbReader-calculated schema for this file from PfbReader
+      List<Schema> actualSchema =
+          PfbReader.getPfbSchema(CompareOutputUtils.getAvroFilePath(fileName, ""));
+      // find the names of all top-level types in the actual schema
+      List<String> actualNames = actualSchema.stream().map(Schema::getName).toList();
+
+      assertEquals(expectedNames, actualNames);
+    }
+  }
+
   @ParameterizedTest
   @MethodSource("provideTestFiles")
   void showNodesTest(String fileName) throws IOException {
@@ -57,8 +97,7 @@ void getGenericRecordsStream(String fileName) throws IOException {
   }
 
   @Test
-  @MethodSource("provideTestFiles")
-  void getGenericRecordsStreamError() throws IOException {
+  void getGenericRecordsStreamError() {
     assertThrows(
         InvalidPfbException.class,
         () -> CompareOutputUtils.testDataStream("noFile.txt"),
diff --git a/library/src/test/java/bio/terra/pfb/utils/CompareOutputUtils.java b/library/src/test/java/bio/terra/pfb/utils/CompareOutputUtils.java
@@ -22,9 +22,7 @@ public class CompareOutputUtils {
   public static void assertJavaPfbIsPyPFB(
       String fileName, PfbCommandType commandType, String filePath, FileExtension fileExtension)
       throws IOException {
-    String pythonOutput;
-    pythonOutput =
-        Files.readString(Paths.get(getPyPfbOutputFilePath(fileName, commandType, fileExtension)));
+    String pythonOutput = getPyPfbOutput(fileName, commandType, fileExtension);
     String avroFilePath = getAvroFilePath(fileName, filePath);
     String javaPfbOutput =
         switch (commandType) {
@@ -91,14 +89,20 @@ public String toString() {
     }
   }
 
-  private static String getAvroFilePath(String fileName, String filePath) {
+  public static String getPyPfbOutput(
+      String fileName, PfbCommandType commandType, FileExtension fileExtension) throws IOException {
+    return Files.readString(
+        Paths.get(getPyPfbOutputFilePath(fileName, commandType, fileExtension)));
+  }
+
+  public static String getAvroFilePath(String fileName, String filePath) {
     if (filePath.isEmpty()) {
       filePath = String.format("src/test/resources/avro/%s.avro", fileName);
     }
     return filePath;
   }
 
-  private static String getPyPfbOutputFilePath(
+  public static String getPyPfbOutputFilePath(
       String fileName, PfbCommandType commandType, FileExtension fileExtension) {
     return String.format(
         "src/test/resources/pyPfbOutput/%s/%s.%s", commandType, fileName, fileExtension);