Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit be0547a

Browse files
committedApr 30, 2023
[WIP] Push writer output through lexer and proxy handler for validation
Fixes xlate#283 Fixes xlate#286 Signed-off-by: Michael Edgar <[email protected]>
1 parent a313ef2 commit be0547a

15 files changed

+983
-608
lines changed
 

‎src/main/java/io/xlate/edi/internal/stream/StaEDIOutputFactory.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ public StaEDIOutputFactory() {
4141
supportedProperties.add(PRETTY_PRINT);
4242
supportedProperties.add(TRUNCATE_EMPTY_ELEMENTS);
4343
supportedProperties.add(FORMAT_ELEMENTS);
44+
supportedProperties.add(EDI_VALIDATE_CONTROL_STRUCTURE);
4445

4546
properties.put(PRETTY_PRINT, Boolean.FALSE);
4647
}

‎src/main/java/io/xlate/edi/internal/stream/StaEDIStreamWriter.java

Lines changed: 589 additions & 422 deletions
Large diffs are not rendered by default.

‎src/main/java/io/xlate/edi/internal/stream/tokenization/EDIException.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@
1515
******************************************************************************/
1616
package io.xlate.edi.internal.stream.tokenization;
1717

18-
import io.xlate.edi.stream.EDIStreamException;
19-
import io.xlate.edi.stream.Location;
20-
2118
import java.util.HashMap;
2219
import java.util.Map;
2320

21+
import io.xlate.edi.stream.EDIStreamException;
22+
import io.xlate.edi.stream.Location;
23+
2424
public class EDIException extends EDIStreamException {
2525

2626
private static final long serialVersionUID = -2724168743697298348L;
@@ -41,7 +41,7 @@ public class EDIException extends EDIStreamException {
4141
exceptionMessages.put(INVALID_STATE,
4242
"EDIE003 - Invalid processing state");
4343
exceptionMessages.put(INVALID_CHARACTER,
44-
"EDIE004 - Invalid input character");
44+
"EDIE004 - Invalid character");
4545
exceptionMessages.put(INCOMPLETE_STREAM,
4646
"EDIE005 - Unexpected end of stream");
4747
}
@@ -55,7 +55,7 @@ public EDIException(String message) {
5555
}
5656

5757
EDIException(Integer id, String message, Location location) {
58-
super(exceptionMessages.get(id) + "; " + message, location);
58+
super(buildMessage(exceptionMessages.get(id), location) + "; " + message, location);
5959
}
6060

6161
public EDIException(Integer id, String message) {

‎src/main/java/io/xlate/edi/internal/stream/tokenization/Lexer.java

Lines changed: 169 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,10 @@ private enum Mode {
4545
}
4646

4747
private final Deque<Mode> modes = new ArrayDeque<>();
48-
private int input = 0;
4948
private State state = State.INITIAL;
49+
private int previousInput = 0;
5050
private State previous;
51+
private boolean dataEventNotified = false;
5152

5253
private interface Notifier {
5354
boolean execute(State state, int start, int length);
@@ -117,6 +118,10 @@ public Lexer(InputStream stream, Charset charset, EventHandler handler, StaEDISt
117118
bn = (notifyState, start, length) -> handler.binaryData(binaryStream);
118119

119120
en = (notifyState, start, length) -> {
121+
if (dataEventNotified) {
122+
dataEventNotified = false;
123+
return false;
124+
}
120125
elementHolder.set(buffer.array(), start, length);
121126
return handler.elementData(elementHolder, true);
122127
};
@@ -127,6 +132,10 @@ public Dialect getDialect() {
127132
return dialect;
128133
}
129134

135+
public CharacterSet getCharacterSet() {
136+
return characters;
137+
}
138+
130139
public void invalidate() {
131140
if (state != State.INVALID) {
132141
previous = state;
@@ -171,141 +180,182 @@ public boolean hasRemaining() throws IOException {
171180

172181
public void parse() throws IOException, EDIException {
173182
try {
174-
parse(this::readCharacterUnchecked);
183+
parse(this::readCharacterUnchecked, false);
175184
} catch (UncheckedIOException e) {
176185
throw e.getCause();
177186
}
178187
}
179188

180-
void parse(IntSupplier inputSource) throws EDIException {
189+
public void parse(CharBuffer buffer) throws EDIException {
190+
IntSupplier inputSource = () -> buffer.hasRemaining() ? buffer.get() : -1;
191+
parse(inputSource, true);
192+
}
193+
194+
public void signalElementDataCompleteEvent(int delimiter) throws EDIException {
195+
CharacterClass clazz = characters.getClass(delimiter);
196+
State dataCompleteState = State.transition(state, dialect, clazz);
197+
198+
switch (dataCompleteState) {
199+
case COMPONENT_END:
200+
handleComponent();
201+
nextEvent();
202+
dataEventNotified = true;
203+
break;
204+
case ELEMENT_END:
205+
case TRAILER_ELEMENT_END:
206+
handleElement();
207+
nextEvent();
208+
dataEventNotified = true;
209+
break;
210+
default:
211+
throw new IllegalStateException(dataCompleteState.toString());
212+
}
213+
}
214+
215+
void parse(IntSupplier inputSource, boolean allowPartialStream) throws EDIException {
181216
if (nextEvent()) {
182217
return;
183218
}
184219

185220
if (state == State.INVALID) {
186221
// Unable to proceed once the state becomes invalid
187-
throw invalidStateError();
222+
throw invalidStateError(previousInput);
188223
}
189224

225+
int input = 0;
190226
boolean eventsReady = false;
191227

192228
while (!eventsReady && (input = inputSource.getAsInt()) > -1) {
193-
location.incrementOffset(input);
229+
eventsReady = processInputCharacter(input);
230+
}
194231

195-
CharacterClass clazz = characters.getClass(input);
196-
previous = state;
197-
state = State.transition(state, dialect, clazz);
198-
LOGGER.finer(() -> String.format("%s + (%s, '%s', %s) -> %s", previous, Dialect.getStandard(dialect), (char) input, clazz, state));
199-
200-
switch (state) {
201-
case INITIAL:
202-
case TAG_SEARCH:
203-
case HEADER_EDIFACT_UNB_SEARCH:
204-
break;
205-
case HEADER_X12_I:
206-
case HEADER_X12_S:
207-
case HEADER_EDIFACT_N:
208-
case HEADER_EDIFACT_U:
209-
case HEADER_TRADACOMS_S:
210-
case HEADER_TRADACOMS_T:
211-
case TAG_1:
212-
case TAG_2:
213-
case TAG_3:
214-
case TRAILER_X12_I:
215-
case TRAILER_X12_E:
216-
case TRAILER_X12_A:
217-
case TRAILER_EDIFACT_U:
218-
case TRAILER_EDIFACT_N:
219-
case TRAILER_EDIFACT_Z:
220-
case TRAILER_TRADACOMS_E:
221-
case TRAILER_TRADACOMS_N:
222-
case TRAILER_TRADACOMS_D:
223-
case ELEMENT_DATA:
224-
case TRAILER_ELEMENT_DATA:
232+
if (input < 0 && !allowPartialStream) {
233+
throw error(EDIException.INCOMPLETE_STREAM);
234+
}
235+
}
236+
237+
boolean processInputCharacter(int input) throws EDIException {
238+
boolean eventsReady = false;
239+
location.incrementOffset(input);
240+
241+
CharacterClass clazz = characters.getClass(input);
242+
previous = state;
243+
previousInput = input;
244+
245+
state = State.transition(state, dialect, clazz);
246+
LOGGER.finer(() -> String.format("%s + (%s, '%s', %s) -> %s", previous, Dialect.getStandard(dialect), (char) input, clazz, state));
247+
248+
switch (state) {
249+
case INITIAL:
250+
case TAG_SEARCH:
251+
case HEADER_EDIFACT_UNB_SEARCH:
252+
break;
253+
case HEADER_X12_I:
254+
case HEADER_X12_S:
255+
case HEADER_EDIFACT_N:
256+
case HEADER_EDIFACT_U:
257+
case HEADER_TRADACOMS_S:
258+
case HEADER_TRADACOMS_T:
259+
case TAG_1:
260+
case TAG_2:
261+
case TAG_3:
262+
case TRAILER_X12_I:
263+
case TRAILER_X12_E:
264+
case TRAILER_X12_A:
265+
case TRAILER_EDIFACT_U:
266+
case TRAILER_EDIFACT_N:
267+
case TRAILER_EDIFACT_Z:
268+
case TRAILER_TRADACOMS_E:
269+
case TRAILER_TRADACOMS_N:
270+
case TRAILER_TRADACOMS_D:
271+
case ELEMENT_DATA:
272+
case TRAILER_ELEMENT_DATA:
273+
buffer.put((char) input);
274+
break;
275+
case ELEMENT_INVALID_DATA:
276+
if (!characters.isIgnored(input)) {
225277
buffer.put((char) input);
226-
break;
227-
case ELEMENT_INVALID_DATA:
228-
if (!characters.isIgnored(input)) {
229-
buffer.put((char) input);
230-
}
231-
break;
232-
case HEADER_EDIFACT_UNB_1: // U - When UNA is present
233-
case HEADER_EDIFACT_UNB_2: // N - When UNA is present
234-
case HEADER_EDIFACT_UNB_3: // B - When UNA is present
235-
handleStateHeaderTag(input);
236-
break;
237-
case HEADER_RELEASE:
238-
case DATA_RELEASE:
239-
// Skip this character - next character will be literal value
240-
break;
241-
case ELEMENT_DATA_BINARY:
242-
handleStateElementDataBinary();
243-
break;
244-
case INTERCHANGE_CANDIDATE:
245-
// ISA, UNA, or UNB was found
246-
handleStateInterchangeCandidate(input);
247-
break;
248-
case HEADER_DATA:
249-
case HEADER_INVALID_DATA:
250-
handleStateHeaderData(input);
251-
eventsReady = dialectConfirmed(State.TAG_SEARCH);
252-
break;
253-
case HEADER_SEGMENT_BEGIN:
254-
dialect.appendHeader(characters, (char) input);
255-
openSegment();
256-
eventsReady = dialectConfirmed(State.ELEMENT_END);
257-
break;
258-
case HEADER_ELEMENT_END:
259-
dialect.appendHeader(characters, (char) input);
260-
handleElement();
261-
eventsReady = dialectConfirmed(State.ELEMENT_END);
262-
break;
263-
case HEADER_COMPONENT_END:
264-
dialect.appendHeader(characters, (char) input);
265-
handleComponent();
266-
eventsReady = dialectConfirmed(State.COMPONENT_END);
267-
break;
268-
case SEGMENT_BEGIN:
269-
case TRAILER_BEGIN:
270-
openSegment();
271-
eventsReady = nextEvent();
272-
break;
273-
case SEGMENT_END:
274-
closeSegment();
275-
eventsReady = nextEvent();
276-
break;
277-
case SEGMENT_EMPTY:
278-
emptySegment();
279-
eventsReady = nextEvent();
280-
break;
281-
case COMPONENT_END:
282-
handleComponent();
283-
eventsReady = nextEvent();
284-
break;
285-
case ELEMENT_END:
286-
case TRAILER_ELEMENT_END:
287-
case ELEMENT_REPEAT:
288-
handleElement();
289-
eventsReady = nextEvent();
290-
break;
291-
case INTERCHANGE_END:
292-
closeInterchange();
293-
eventsReady = nextEvent();
294-
break;
295-
default:
296-
if (characters.isIgnored(input)) {
297-
state = previous;
298-
} else if (clazz != CharacterClass.INVALID) {
299-
throw invalidStateError();
300-
} else {
301-
throw error(EDIException.INVALID_CHARACTER);
302-
}
278+
}
279+
break;
280+
case HEADER_EDIFACT_UNB_1: // U - When UNA is present
281+
case HEADER_EDIFACT_UNB_2: // N - When UNA is present
282+
case HEADER_EDIFACT_UNB_3: // B - When UNA is present
283+
handleStateHeaderTag(input);
284+
break;
285+
case HEADER_RELEASE:
286+
case DATA_RELEASE:
287+
// Skip this character - next character will be literal value
288+
break;
289+
case ELEMENT_DATA_BINARY:
290+
handleStateElementDataBinary();
291+
break;
292+
case INTERCHANGE_CANDIDATE:
293+
// ISA, UNA, or UNB was found
294+
handleStateInterchangeCandidate(input);
295+
break;
296+
case HEADER_DATA:
297+
case HEADER_INVALID_DATA:
298+
handleStateHeaderData(input);
299+
eventsReady = dialectConfirmed(State.TAG_SEARCH);
300+
break;
301+
case HEADER_SEGMENT_BEGIN:
302+
dialect.appendHeader(characters, (char) input);
303+
openSegment();
304+
eventsReady = dialectConfirmed(State.ELEMENT_END);
305+
break;
306+
case HEADER_SEGMENT_END:
307+
dialect.appendHeader(characters, (char) input);
308+
closeSegment();
309+
eventsReady = dialectConfirmed(State.SEGMENT_END);
310+
break;
311+
case HEADER_ELEMENT_END:
312+
dialect.appendHeader(characters, (char) input);
313+
handleElement();
314+
eventsReady = dialectConfirmed(State.ELEMENT_END);
315+
break;
316+
case HEADER_COMPONENT_END:
317+
dialect.appendHeader(characters, (char) input);
318+
handleComponent();
319+
eventsReady = dialectConfirmed(State.COMPONENT_END);
320+
break;
321+
case SEGMENT_BEGIN:
322+
case TRAILER_BEGIN:
323+
openSegment();
324+
eventsReady = nextEvent();
325+
break;
326+
case SEGMENT_END:
327+
closeSegment();
328+
eventsReady = nextEvent();
329+
break;
330+
case SEGMENT_EMPTY:
331+
emptySegment();
332+
eventsReady = nextEvent();
333+
break;
334+
case COMPONENT_END:
335+
handleComponent();
336+
eventsReady = nextEvent();
337+
break;
338+
case ELEMENT_END:
339+
case TRAILER_ELEMENT_END:
340+
case ELEMENT_REPEAT:
341+
handleElement();
342+
eventsReady = nextEvent();
343+
break;
344+
case INTERCHANGE_END:
345+
closeInterchange();
346+
eventsReady = nextEvent();
347+
break;
348+
default:
349+
if (characters.isIgnored(input)) {
350+
state = previous;
351+
} else if (clazz != CharacterClass.INVALID) {
352+
throw invalidStateError(input);
353+
} else {
354+
throw error(EDIException.INVALID_CHARACTER);
303355
}
304356
}
305357

306-
if (input < 0) {
307-
throw error(EDIException.INCOMPLETE_STREAM);
308-
}
358+
return eventsReady;
309359
}
310360

311361
int readCharacterUnchecked() {
@@ -445,7 +495,7 @@ private boolean dialectConfirmed(State confirmed) throws EDIException {
445495
return false;
446496
}
447497

448-
private EDIException invalidStateError() {
498+
private EDIException invalidStateError(int input) {
449499
StringBuilder message = new StringBuilder();
450500
message.append(state);
451501
message.append(" (previous: ");
@@ -476,10 +526,10 @@ private boolean nextEvent() {
476526
int start = startQueue.remove();
477527
int length = lengthQueue.remove();
478528
eventsReady = event.execute(nextState, start, length);
479-
}
480529

481-
if (events.isEmpty()) {
482-
buffer.clear();
530+
if (events.isEmpty()) {
531+
buffer.clear();
532+
}
483533
}
484534

485535
return eventsReady;

0 commit comments

Comments
 (0)