Skip to content

Commit 0268b8e

Browse files
Merge pull request ibi-group#473 from ibi-group/gtfsplus-handle-empty-rows
Report empty GTFS+ rows in separate validation issue
2 parents 0b27d57 + 20f5dc1 commit 0268b8e

File tree

3 files changed

+49
-11
lines changed

3 files changed

+49
-11
lines changed

src/main/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidation.java

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import com.fasterxml.jackson.databind.JsonNode;
1010
import com.fasterxml.jackson.databind.node.ArrayNode;
1111
import org.apache.commons.io.input.BOMInputStream;
12+
import org.apache.logging.log4j.util.Strings;
1213
import org.slf4j.Logger;
1314
import org.slf4j.LoggerFactory;
1415

@@ -150,34 +151,44 @@ private static void validateTable(
150151
// Iterate over each row and validate each field value.
151152
int rowIndex = 0;
152153
int rowsWithWrongNumberOfColumns = 0;
154+
int emptyRows = 0;
153155
while (csvReader.readRecord()) {
154156
// First, check that row has the correct number of fields.
155157
int recordColumnCount = csvReader.getColumnCount();
156-
if (recordColumnCount != fieldsFound.length) {
157-
rowsWithWrongNumberOfColumns++;
158-
}
159-
// Validate each value in row. Note: we iterate over the fields and not values because a row may be missing
160-
// columns, but we still want to validate that missing value (e.g., if it is missing a required field).
161158
String[] rowValues = csvReader.getValues();
162-
for (int f = 0; f < fieldsFound.length; f++) {
163-
// If value exists for index, use that. Otherwise, default to null to avoid out of bounds exception.
164-
String val = f < recordColumnCount ? rowValues[f] : null;
165-
validateTableValue(issues, tableId, rowIndex, rowValues, val, fieldsFound, fieldsFound[f], gtfsFeed);
159+
if (recordColumnCount == 1 && Strings.isBlank(rowValues[0])) {
160+
// If row is empty (technically, the row has one column with a blank value),
161+
// report that as such (and skip validating column values).
162+
emptyRows++;
163+
} else {
164+
if (recordColumnCount != fieldsFound.length) {
165+
rowsWithWrongNumberOfColumns++;
166+
}
167+
// Validate each value in row. Note: we iterate over the fields and not values because a row may be missing
168+
// columns, but we still want to validate that missing value (e.g., if it is missing a required field).
169+
for (int f = 0; f < fieldsFound.length; f++) {
170+
// If value exists for index, use that. Otherwise, default to null to avoid out of bounds exception.
171+
String val = f < recordColumnCount ? rowValues[f] : null;
172+
validateTableValue(issues, tableId, rowIndex, rowValues, val, fieldsFound, fieldsFound[f], gtfsFeed);
173+
}
166174
}
167175
rowIndex++;
168176
}
169177
csvReader.close();
170178

171-
// Add issue for wrong number of columns after processing all rows.
179+
// Add issues for wrong number of columns and for empty rows after processing all rows.
172180
// Note: We considered adding an issue for each row, but opted for the single error approach because there's no
173181
// concept of a row-level issue in the UI right now. So we would potentially need to add that to the UI
174182
// somewhere. Also, there's the trouble of reporting the issue at the row level, but not really giving the user
175-
// a great way to resolve the issue in the GTFS+ editor. Essentially, all of the rows with the wrong number of
183+
// a great way to resolve the issue in the GTFS+ editor. Essentially, all rows with the wrong number of
176184
// columns can be resolved simply by clicking the "Save and Revalidate" button -- so the resolution is more at
177185
// the table level than the row level (like, for example, a bad value for a field would be).
178186
if (rowsWithWrongNumberOfColumns > 0) {
179187
issues.add(new ValidationIssue(tableId, null, -1, rowsWithWrongNumberOfColumns + " row(s) do not contain the same number of fields as there are headers. (File may need to be edited manually.)"));
180188
}
189+
if (emptyRows > 0) {
190+
issues.add(new ValidationIssue(tableId, null, -1, emptyRows + " row(s) are empty. (File may need to be edited manually.)"));
191+
}
181192
}
182193

183194
/** Determine if a GTFS+ spec field is required. */

src/test/java/com/conveyal/datatools/manager/gtfsplus/GtfsPlusValidationTest.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ public class GtfsPlusValidationTest extends UnitTest {
3232
private static final Logger LOG = LoggerFactory.getLogger(MergeFeedsJobTest.class);
3333
private static FeedVersion bartVersion1;
3434
private static FeedVersion bartVersion1WithQuotedValues;
35+
private static FeedVersion lavtaVersion1;
3536
private static Project project;
3637
private static JsonNode routeAttributesFieldsNode;
3738

@@ -55,6 +56,11 @@ public static void setUp() throws IOException {
5556
routeAttributesFieldsNode = Objects.requireNonNull(
5657
GtfsPlusValidation.findNode(DataManager.gtfsPlusConfig, "id", "route_attributes")
5758
).get("fields");
59+
60+
FeedSource lavta = new FeedSource("LAVTA");
61+
lavta.projectId = project.id;
62+
Persistence.feedSources.create(lavta);
63+
lavtaVersion1 = createFeedVersionFromGtfsZip(lavta, "lavta-cal-attributes.zip");
5864
}
5965

6066
@AfterAll
@@ -138,4 +144,25 @@ void canBuildRouteSubcategoryToCategoryMap() {
138144
)
139145
), equalTo("3"));
140146
}
147+
148+
@Test
149+
void shouldReportEmptyRows() throws Exception {
150+
// An empty row should be reported as such (separately from a row with incorrect number of columns).
151+
152+
LOG.info("Validating GTFS+ with an empty row");
153+
GtfsPlusValidation validation = GtfsPlusValidation.validate(lavtaVersion1.id);
154+
// Expect one GTFS+ issue of type "empty row".
155+
assertThat(
156+
"Should have one GTFS+ validation issue on the calendar_attributes table",
157+
validation.issues.size(), equalTo(1)
158+
);
159+
assertThat(
160+
"Should have the validation issue on the calendar_attributes table",
161+
validation.issues.get(0).tableId, equalTo("calendar_attributes")
162+
);
163+
assertThat(
164+
"Should have the GTFS+ 'empty row' validation issue on the calendar_attributes table",
165+
validation.issues.get(0).description, equalTo("1 row(s) are empty. (File may need to be edited manually.)")
166+
);
167+
}
141168
}
Binary file not shown.

0 commit comments

Comments
 (0)