Fix bug in EDITokenizer that causes incorrect results in some cases w…

…hen processing multiple interchanges within a single input source.
BerryWorksSoftware · Apr 27, 2015 · be1d14a · be1d14a
1 parent da8896b
commit be1d14a
Showing 1 changed file with 160 additions and 181 deletions.
diff --git a/edireader/src/main/java/com/berryworks/edireader/tokenizer/EDITokenizer.java b/edireader/src/main/java/com/berryworks/edireader/tokenizer/EDITokenizer.java
@@ -35,204 +35,183 @@
  * past the next token and return a <code>Token</code> instance describing that token.
  * <p/>
  */
-public class EDITokenizer extends AbstractTokenizer
-{
-
-  public static final int BUFFER_SIZE = 1000;
-  private final CharBuffer charBuffer = CharBuffer.wrap(new char[BUFFER_SIZE]);
-
-  public EDITokenizer(Reader source)
-  {
-    super(source);
-    charBuffer.flip();
-    if (EDIReader.debug)
-      trace("Constructed a new EDITokenizer");
-  }
-
-  public EDITokenizer(Reader source, char[] preRead)
-  {
-    this(source);
-    if (preRead == null || preRead.length == 0)
-      return;
-
-    if (preRead.length > charBuffer.capacity())
-      throw new RuntimeException("Attempt to create EDITokenizer with " + preRead.length +
-              " pre-read chars, which is greater than the internal buffer size of " + charBuffer.capacity());
-    charBuffer.clear();
-    charBuffer.put(preRead);
-    charBuffer.flip();
-  }
-
-  /**
-   * Returns a String representation of the current state of the tokenizer
-   * for testing and debugging purposes.
-   *
-   * @return String representation
-   */
-  @Override
-  public String toString()
-  {
-    String result = "tokenizer state:";
-    result += " segmentCount=" + segmentCount;
-    result += " charCount=" + charCount;
-    result += " segTokenCount=" + segTokenCount;
-    result += " segCharCount=" + segCharCount;
-    result += " currentToken=" + currentToken;
-    result += " buffer.limit=" + charBuffer.limit();
-    result += " buffer.position=" + charBuffer.position();
-    return result;
-  }
-
-  /**
-   * Gets the next character of input. <pr>Sets cChar, cClass
-   *
-   * @throws java.io.IOException for problem reading EDI data
-   */
-  public void getChar() throws IOException
-  {
-    if (unGot)
-    {
-      // The current character has been "put back" with ungetChar()
-      // after having been seen with getChar(). Therefore, this call
-      // to getChar() can simply reget the current character.
-      unGot = false;
-      charCount++;
-      segCharCount++;
-      return;
-    }
+public class EDITokenizer extends AbstractTokenizer {
+
+    public static final int BUFFER_SIZE = 1000;
+    private final CharBuffer charBuffer = CharBuffer.wrap(new char[BUFFER_SIZE]);
 
-    // Read a fresh character from the input source.
-    // But first copy the current one to an outputWriter
-    // or the recorder if necessary.
-    if (outputWriter != null)
-    {
-      // We do have an outputWriter wanting data, but do we have
-      // a current character to write? And make sure writing is
-      // not suspended.
-      if ((!endOfFile) && (!writingSuspended))
-        outputWriter.write(cChar);
+    public EDITokenizer(Reader source) {
+        super(source);
+        charBuffer.flip();
+        if (EDIReader.debug)
+            trace("Constructed a new EDITokenizer");
     }
-    if (recorderOn)
-      recording.append(cChar);
 
-    if (charBuffer.remaining() == 0)
-    {
-      readUntilBufferProvidesAtLeast(1);
+    public EDITokenizer(Reader source, char[] preRead) {
+        this(source);
+        if (preRead == null || preRead.length == 0)
+            return;
+
+        if (preRead.length > charBuffer.capacity())
+            throw new RuntimeException("Attempt to create EDITokenizer with " + preRead.length +
+                    " pre-read chars, which is greater than the internal buffer size of " + charBuffer.capacity());
+        charBuffer.clear();
+        charBuffer.put(preRead);
+        charBuffer.flip();
     }
 
-    if (endOfFile)
-    {
-      cClass = CharacterClass.EOF;
-      if (EDIReader.debug)
-        trace("end-of-file encountered");
+    /**
+     * Returns a String representation of the current state of the tokenizer
+     * for testing and debugging purposes.
+     *
+     * @return String representation
+     */
+    @Override
+    public String toString() {
+        String result = "tokenizer state:";
+        result += " segmentCount=" + segmentCount;
+        result += " charCount=" + charCount;
+        result += " segTokenCount=" + segTokenCount;
+        result += " segCharCount=" + segCharCount;
+        result += " currentToken=" + currentToken;
+        result += " buffer.limit=" + charBuffer.limit();
+        result += " buffer.position=" + charBuffer.position();
+        return result;
     }
-    else
-    {
-      cChar = charBuffer.get();
-      if (cChar == delimiter)
-        cClass = CharacterClass.DELIMITER;
-      else if (cChar == subDelimiter)
-        cClass = CharacterClass.SUB_DELIMITER;
-      else if (cChar == release)
-        cClass = CharacterClass.RELEASE;
-      else if (cChar == terminator)
-        cClass = CharacterClass.TERMINATOR;
-      else if (cChar == repetitionSeparator)
-        cClass = CharacterClass.REPEAT_DELIMITER;
-      else
-        cClass = CharacterClass.DATA;
+
+    /**
+     * Gets the next character of input. <pr>Sets cChar, cClass
+     *
+     * @throws java.io.IOException for problem reading EDI data
+     */
+    public void getChar() throws IOException {
+        if (unGot) {
+            // The current character has been "put back" with ungetChar()
+            // after having been seen with getChar(). Therefore, this call
+            // to getChar() can simply reget the current character.
+            unGot = false;
+            charCount++;
+            segCharCount++;
+            return;
+        }
+
+        // Read a fresh character from the input source.
+        // But first copy the current one to an outputWriter
+        // or the recorder if necessary.
+        if (outputWriter != null) {
+            // We do have an outputWriter wanting data, but do we have
+            // a current character to write? And make sure writing is
+            // not suspended.
+            if ((!endOfFile) && (!writingSuspended))
+                outputWriter.write(cChar);
+        }
+        if (recorderOn)
+            recording.append(cChar);
+
+        if (charBuffer.remaining() == 0) {
+            readUntilBufferProvidesAtLeast(1);
+        }
+
+        if (endOfFile) {
+            cClass = CharacterClass.EOF;
+            if (EDIReader.debug)
+                trace("end-of-file encountered");
+        } else {
+            cChar = charBuffer.get();
+            if (cChar == delimiter)
+                cClass = CharacterClass.DELIMITER;
+            else if (cChar == subDelimiter)
+                cClass = CharacterClass.SUB_DELIMITER;
+            else if (cChar == release)
+                cClass = CharacterClass.RELEASE;
+            else if (cChar == terminator)
+                cClass = CharacterClass.TERMINATOR;
+            else if (cChar == repetitionSeparator)
+                cClass = CharacterClass.REPEAT_DELIMITER;
+            else
+                cClass = CharacterClass.DATA;
+        }
+        charCount++;
+        segCharCount++;
     }
-    charCount++;
-    segCharCount++;
-  }
 
-  public char[] getBuffered()
-  {
-    char[] result = new char[0];
+    public char[] getBuffered() {
+        char[] result = new char[0];
 
-    if (endOfFile)
-      return result;
+        if (endOfFile)
+            return result;
 
-    if (charBuffer.remaining() == 0 && !unGot) {
-      return result;
-    }
+        if (charBuffer.remaining() == 0 && !unGot) {
+            return result;
+        }
+
+        try {
+            result = lookahead(charBuffer.remaining() + (unGot ? 1 : 0));
+        } catch (Exception ignore) {
+        }
 
-    try {
-      result = lookahead(charBuffer.remaining() + (unGot ? 1 : 0));
-    } catch (Exception ignore) {
+        return result;
     }
 
-    return result;
-  }
-
-  /**
-   * Look ahead into the source of input chars and return the next n chars to
-   * be seen, without disturbing the normal operation of getChar().
-   *
-   * @param n number of chars to return
-   * @return char[] containing upcoming input chars
-   * @throws java.io.IOException for problem reading EDI data
-   * @throws com.berryworks.edireader.EDISyntaxException
-   *
-   */
-  public char[] lookahead(int n) throws IOException, EDISyntaxException
-  {
-    if (EDIReader.debug)
-      trace("EDITokenizer.lookahead(" + n + ")");
-
-    char[] rval = new char[n];
-
-    // The 1st char is grabbed using the tokenizer's built-in
-    // getChar() / ungetChar() mechanism. This allows things to work
-    // properly whether or not the next char has already been gotten.
-    getChar();
-    rval[0] = cChar;
-    ungetChar();
-
-    // The minus 1 is because we have already filled the first char of the return value, so we only need n-1 more
-    if (charBuffer.remaining() < n - 1)
-    {
-      if (EDIReader.debug)
+    /**
+     * Look ahead into the source of input chars and return the next n chars to
+     * be seen, without disturbing the normal operation of getChar().
+     *
+     * @param n number of chars to return
+     * @return char[] containing upcoming input chars
+     * @throws java.io.IOException                         for problem reading EDI data
+     * @throws com.berryworks.edireader.EDISyntaxException
+     */
+    public char[] lookahead(int n) throws IOException, EDISyntaxException {
         if (EDIReader.debug)
-          trace("buffering more data to satisfy lookahead(" + n + ")");
-      charBuffer.compact();
-      readUntilBufferProvidesAtLeast(n - 1);
+            trace("EDITokenizer.lookahead(" + n + ")");
+
+        char[] rval = new char[n];
+
+        // The 1st char is grabbed using the tokenizer's built-in
+        // getChar() / ungetChar() mechanism. This allows things to work
+        // properly whether or not the next char has already been gotten.
+        getChar();
+        rval[0] = cChar;
+        ungetChar();
+
+        // The minus 1 is because we have already filled the first char of the return value, so we only need n-1 more
+        if (charBuffer.remaining() < n - 1) {
+            if (EDIReader.debug)
+                if (EDIReader.debug)
+                    trace("buffering more data to satisfy lookahead(" + n + ")");
+            readUntilBufferProvidesAtLeast(n - 1);
+        }
+
+        // Move chars from the buffer into the return value
+        int j = 1;
+        for (int i = charBuffer.position(); i < charBuffer.limit() && j < n; i++)
+            rval[j++] = charBuffer.get(i);
+
+        // If more lookahead chars were requested than were satisfied for any reason,
+        // then fill the return value with '?' to the requested length.
+        for (; j < n; ) {
+            rval[j++] = '?';
+        }
+
+        return rval;
     }
 
-    // Move chars from the buffer into the return value,
-    // up to the length of the buffer
-    int j = 1;
-    for (int i = charBuffer.position(); i < charBuffer.position() + n - 1; i++)
-      rval[j++] = charBuffer.get(i);
-
-    // If more lookahead chars were requested than were satisfied for any reason,
-    // then fill the return value with '?' to the requested length.
-    for (; j < n;) rval[j++] = '?';
-
-    return rval;
-  }
-
-  private void readUntilBufferProvidesAtLeast(int needed) throws IOException
-  {
-
-    while (charBuffer.remaining() < needed)
-    {
-      charBuffer.compact();
-      int n;
-      while ((n = inputReader.read(charBuffer)) == 0)
-      {
-        if (EDIReader.debug) trace("read returned zero in readUntil...");
-      }
-      if (EDIReader.debug) trace("readUntil... got " + n + " chars of input into buffer");
-      if (n < 0)
-      {
-        if (EDIReader.debug) trace("hit end of file in readUntil...");
-        endOfFile = true;
-        break;
-      }
-      charBuffer.flip();
+    private void readUntilBufferProvidesAtLeast(int needed) throws IOException {
+
+        while (charBuffer.remaining() < needed) {
+            charBuffer.compact();
+            int n;
+            while ((n = inputReader.read(charBuffer)) == 0) {
+            }
+
+            charBuffer.flip();
+            if (n < 0) {
+                endOfFile = true;
+                break;
+            }
+        }
     }
-  }
 
 
 }