@@ -45,9 +45,10 @@ private enum Mode {
45
45
}
46
46
47
47
private final Deque <Mode > modes = new ArrayDeque <>();
48
- private int input = 0 ;
49
48
private State state = State .INITIAL ;
49
+ private int previousInput = 0 ;
50
50
private State previous ;
51
+ private boolean dataEventNotified = false ;
51
52
52
53
private interface Notifier {
53
54
boolean execute (State state , int start , int length );
@@ -117,6 +118,10 @@ public Lexer(InputStream stream, Charset charset, EventHandler handler, StaEDISt
117
118
bn = (notifyState , start , length ) -> handler .binaryData (binaryStream );
118
119
119
120
en = (notifyState , start , length ) -> {
121
+ if (dataEventNotified ) {
122
+ dataEventNotified = false ;
123
+ return false ;
124
+ }
120
125
elementHolder .set (buffer .array (), start , length );
121
126
return handler .elementData (elementHolder , true );
122
127
};
@@ -127,6 +132,10 @@ public Dialect getDialect() {
127
132
return dialect ;
128
133
}
129
134
135
+ public CharacterSet getCharacterSet () {
136
+ return characters ;
137
+ }
138
+
130
139
public void invalidate () {
131
140
if (state != State .INVALID ) {
132
141
previous = state ;
@@ -171,141 +180,182 @@ public boolean hasRemaining() throws IOException {
171
180
172
181
public void parse () throws IOException , EDIException {
173
182
try {
174
- parse (this ::readCharacterUnchecked );
183
+ parse (this ::readCharacterUnchecked , false );
175
184
} catch (UncheckedIOException e ) {
176
185
throw e .getCause ();
177
186
}
178
187
}
179
188
180
- void parse (IntSupplier inputSource ) throws EDIException {
189
+ public void parse (CharBuffer buffer ) throws EDIException {
190
+ IntSupplier inputSource = () -> buffer .hasRemaining () ? buffer .get () : -1 ;
191
+ parse (inputSource , true );
192
+ }
193
+
194
+ public void signalElementDataCompleteEvent (int delimiter ) throws EDIException {
195
+ CharacterClass clazz = characters .getClass (delimiter );
196
+ State dataCompleteState = State .transition (state , dialect , clazz );
197
+
198
+ switch (dataCompleteState ) {
199
+ case COMPONENT_END :
200
+ handleComponent ();
201
+ nextEvent ();
202
+ dataEventNotified = true ;
203
+ break ;
204
+ case ELEMENT_END :
205
+ case TRAILER_ELEMENT_END :
206
+ handleElement ();
207
+ nextEvent ();
208
+ dataEventNotified = true ;
209
+ break ;
210
+ default :
211
+ throw new IllegalStateException (dataCompleteState .toString ());
212
+ }
213
+ }
214
+
215
+ void parse (IntSupplier inputSource , boolean allowPartialStream ) throws EDIException {
181
216
if (nextEvent ()) {
182
217
return ;
183
218
}
184
219
185
220
if (state == State .INVALID ) {
186
221
// Unable to proceed once the state becomes invalid
187
- throw invalidStateError ();
222
+ throw invalidStateError (previousInput );
188
223
}
189
224
225
+ int input = 0 ;
190
226
boolean eventsReady = false ;
191
227
192
228
while (!eventsReady && (input = inputSource .getAsInt ()) > -1 ) {
193
- location .incrementOffset (input );
229
+ eventsReady = processInputCharacter (input );
230
+ }
194
231
195
- CharacterClass clazz = characters .getClass (input );
196
- previous = state ;
197
- state = State .transition (state , dialect , clazz );
198
- LOGGER .finer (() -> String .format ("%s + (%s, '%s', %s) -> %s" , previous , Dialect .getStandard (dialect ), (char ) input , clazz , state ));
199
-
200
- switch (state ) {
201
- case INITIAL :
202
- case TAG_SEARCH :
203
- case HEADER_EDIFACT_UNB_SEARCH :
204
- break ;
205
- case HEADER_X12_I :
206
- case HEADER_X12_S :
207
- case HEADER_EDIFACT_N :
208
- case HEADER_EDIFACT_U :
209
- case HEADER_TRADACOMS_S :
210
- case HEADER_TRADACOMS_T :
211
- case TAG_1 :
212
- case TAG_2 :
213
- case TAG_3 :
214
- case TRAILER_X12_I :
215
- case TRAILER_X12_E :
216
- case TRAILER_X12_A :
217
- case TRAILER_EDIFACT_U :
218
- case TRAILER_EDIFACT_N :
219
- case TRAILER_EDIFACT_Z :
220
- case TRAILER_TRADACOMS_E :
221
- case TRAILER_TRADACOMS_N :
222
- case TRAILER_TRADACOMS_D :
223
- case ELEMENT_DATA :
224
- case TRAILER_ELEMENT_DATA :
232
+ if (input < 0 && !allowPartialStream ) {
233
+ throw error (EDIException .INCOMPLETE_STREAM );
234
+ }
235
+ }
236
+
237
+ boolean processInputCharacter (int input ) throws EDIException {
238
+ boolean eventsReady = false ;
239
+ location .incrementOffset (input );
240
+
241
+ CharacterClass clazz = characters .getClass (input );
242
+ previous = state ;
243
+ previousInput = input ;
244
+
245
+ state = State .transition (state , dialect , clazz );
246
+ LOGGER .finer (() -> String .format ("%s + (%s, '%s', %s) -> %s" , previous , Dialect .getStandard (dialect ), (char ) input , clazz , state ));
247
+
248
+ switch (state ) {
249
+ case INITIAL :
250
+ case TAG_SEARCH :
251
+ case HEADER_EDIFACT_UNB_SEARCH :
252
+ break ;
253
+ case HEADER_X12_I :
254
+ case HEADER_X12_S :
255
+ case HEADER_EDIFACT_N :
256
+ case HEADER_EDIFACT_U :
257
+ case HEADER_TRADACOMS_S :
258
+ case HEADER_TRADACOMS_T :
259
+ case TAG_1 :
260
+ case TAG_2 :
261
+ case TAG_3 :
262
+ case TRAILER_X12_I :
263
+ case TRAILER_X12_E :
264
+ case TRAILER_X12_A :
265
+ case TRAILER_EDIFACT_U :
266
+ case TRAILER_EDIFACT_N :
267
+ case TRAILER_EDIFACT_Z :
268
+ case TRAILER_TRADACOMS_E :
269
+ case TRAILER_TRADACOMS_N :
270
+ case TRAILER_TRADACOMS_D :
271
+ case ELEMENT_DATA :
272
+ case TRAILER_ELEMENT_DATA :
273
+ buffer .put ((char ) input );
274
+ break ;
275
+ case ELEMENT_INVALID_DATA :
276
+ if (!characters .isIgnored (input )) {
225
277
buffer .put ((char ) input );
226
- break ;
227
- case ELEMENT_INVALID_DATA :
228
- if (! characters . isIgnored ( input )) {
229
- buffer . put (( char ) input );
230
- }
231
- break ;
232
- case HEADER_EDIFACT_UNB_1 : // U - When UNA is present
233
- case HEADER_EDIFACT_UNB_2 : // N - When UNA is present
234
- case HEADER_EDIFACT_UNB_3 : // B - When UNA is present
235
- handleStateHeaderTag ( input );
236
- break ;
237
- case HEADER_RELEASE :
238
- case DATA_RELEASE :
239
- // Skip this character - next character will be literal value
240
- break ;
241
- case ELEMENT_DATA_BINARY :
242
- handleStateElementDataBinary ( );
243
- break ;
244
- case INTERCHANGE_CANDIDATE :
245
- // ISA, UNA, or UNB was found
246
- handleStateInterchangeCandidate (input );
247
- break ;
248
- case HEADER_DATA :
249
- case HEADER_INVALID_DATA :
250
- handleStateHeaderData ( input );
251
- eventsReady = dialectConfirmed ( State . TAG_SEARCH );
252
- break ;
253
- case HEADER_SEGMENT_BEGIN :
254
- dialect . appendHeader ( characters , ( char ) input );
255
- openSegment ( );
256
- eventsReady = dialectConfirmed ( State . ELEMENT_END );
257
- break ;
258
- case HEADER_ELEMENT_END :
259
- dialect . appendHeader ( characters , ( char ) input );
260
- handleElement ( );
261
- eventsReady = dialectConfirmed ( State . ELEMENT_END );
262
- break ;
263
- case HEADER_COMPONENT_END :
264
- dialect . appendHeader ( characters , ( char ) input );
265
- handleComponent ( );
266
- eventsReady = dialectConfirmed ( State . COMPONENT_END );
267
- break ;
268
- case SEGMENT_BEGIN :
269
- case TRAILER_BEGIN :
270
- openSegment ();
271
- eventsReady = nextEvent ();
272
- break ;
273
- case SEGMENT_END :
274
- closeSegment ();
275
- eventsReady = nextEvent ();
276
- break ;
277
- case SEGMENT_EMPTY :
278
- emptySegment ();
279
- eventsReady = nextEvent ();
280
- break ;
281
- case COMPONENT_END :
282
- handleComponent ();
283
- eventsReady = nextEvent ();
284
- break ;
285
- case ELEMENT_END :
286
- case TRAILER_ELEMENT_END :
287
- case ELEMENT_REPEAT :
288
- handleElement ();
289
- eventsReady = nextEvent ();
290
- break ;
291
- case INTERCHANGE_END :
292
- closeInterchange ();
293
- eventsReady = nextEvent ();
294
- break ;
295
- default :
296
- if ( characters . isIgnored ( input )) {
297
- state = previous ;
298
- } else if ( clazz != CharacterClass . INVALID ) {
299
- throw invalidStateError ();
300
- } else {
301
- throw error ( EDIException . INVALID_CHARACTER );
302
- }
278
+ }
279
+ break ;
280
+ case HEADER_EDIFACT_UNB_1 : // U - When UNA is present
281
+ case HEADER_EDIFACT_UNB_2 : // N - When UNA is present
282
+ case HEADER_EDIFACT_UNB_3 : // B - When UNA is present
283
+ handleStateHeaderTag ( input ) ;
284
+ break ;
285
+ case HEADER_RELEASE :
286
+ case DATA_RELEASE :
287
+ // Skip this character - next character will be literal value
288
+ break ;
289
+ case ELEMENT_DATA_BINARY :
290
+ handleStateElementDataBinary ();
291
+ break ;
292
+ case INTERCHANGE_CANDIDATE :
293
+ // ISA, UNA, or UNB was found
294
+ handleStateInterchangeCandidate ( input );
295
+ break ;
296
+ case HEADER_DATA :
297
+ case HEADER_INVALID_DATA :
298
+ handleStateHeaderData (input );
299
+ eventsReady = dialectConfirmed ( State . TAG_SEARCH ) ;
300
+ break ;
301
+ case HEADER_SEGMENT_BEGIN :
302
+ dialect . appendHeader ( characters , ( char ) input );
303
+ openSegment ( );
304
+ eventsReady = dialectConfirmed ( State . ELEMENT_END ) ;
305
+ break ;
306
+ case HEADER_SEGMENT_END :
307
+ dialect . appendHeader ( characters , ( char ) input );
308
+ closeSegment ( );
309
+ eventsReady = dialectConfirmed ( State . SEGMENT_END ) ;
310
+ break ;
311
+ case HEADER_ELEMENT_END :
312
+ dialect . appendHeader ( characters , ( char ) input );
313
+ handleElement ( );
314
+ eventsReady = dialectConfirmed ( State . ELEMENT_END ) ;
315
+ break ;
316
+ case HEADER_COMPONENT_END :
317
+ dialect . appendHeader ( characters , ( char ) input );
318
+ handleComponent ( );
319
+ eventsReady = dialectConfirmed ( State . COMPONENT_END ) ;
320
+ break ;
321
+ case SEGMENT_BEGIN :
322
+ case TRAILER_BEGIN :
323
+ openSegment ();
324
+ eventsReady = nextEvent () ;
325
+ break ;
326
+ case SEGMENT_END :
327
+ closeSegment ();
328
+ eventsReady = nextEvent () ;
329
+ break ;
330
+ case SEGMENT_EMPTY :
331
+ emptySegment ();
332
+ eventsReady = nextEvent () ;
333
+ break ;
334
+ case COMPONENT_END :
335
+ handleComponent ();
336
+ eventsReady = nextEvent () ;
337
+ break ;
338
+ case ELEMENT_END :
339
+ case TRAILER_ELEMENT_END :
340
+ case ELEMENT_REPEAT :
341
+ handleElement ();
342
+ eventsReady = nextEvent () ;
343
+ break ;
344
+ case INTERCHANGE_END :
345
+ closeInterchange ();
346
+ eventsReady = nextEvent () ;
347
+ break ;
348
+ default :
349
+ if ( characters . isIgnored ( input )) {
350
+ state = previous ;
351
+ } else if ( clazz != CharacterClass . INVALID ) {
352
+ throw invalidStateError ( input );
353
+ } else {
354
+ throw error ( EDIException . INVALID_CHARACTER );
303
355
}
304
356
}
305
357
306
- if (input < 0 ) {
307
- throw error (EDIException .INCOMPLETE_STREAM );
308
- }
358
+ return eventsReady ;
309
359
}
310
360
311
361
int readCharacterUnchecked () {
@@ -445,7 +495,7 @@ private boolean dialectConfirmed(State confirmed) throws EDIException {
445
495
return false ;
446
496
}
447
497
448
- private EDIException invalidStateError () {
498
+ private EDIException invalidStateError (int input ) {
449
499
StringBuilder message = new StringBuilder ();
450
500
message .append (state );
451
501
message .append (" (previous: " );
@@ -476,10 +526,10 @@ private boolean nextEvent() {
476
526
int start = startQueue .remove ();
477
527
int length = lengthQueue .remove ();
478
528
eventsReady = event .execute (nextState , start , length );
479
- }
480
529
481
- if (events .isEmpty ()) {
482
- buffer .clear ();
530
+ if (events .isEmpty ()) {
531
+ buffer .clear ();
532
+ }
483
533
}
484
534
485
535
return eventsReady ;
0 commit comments