Use Vocabulary interface instead of token names

sharwell · sharwell · commit 44a32991bb7b · 2014-10-01T05:42:56.000-05:00
diff --git a/org-antlr-works-editor/src/org/antlr/works/editor/grammar/completion/KeywordCompletionItem.java b/org-antlr-works-editor/src/org/antlr/works/editor/grammar/completion/KeywordCompletionItem.java
@@ -40,24 +40,19 @@ public class KeywordCompletionItem extends GrammarCompletionItem {
                         GrammarLexer.CHANNELS,
                         GrammarLexer.OPTIONS);
 
-    private static final Map<Integer, String> KEYWORDS =
-        new HashMap<Integer, String>() {{
-            for (int i : KEYWORD_TYPES.toArray()) {
-                String keyword = GrammarLexer.tokenNames[i].toLowerCase();
-                if (keyword.charAt(0) == '\'') {
-                    keyword = keyword.substring(1, keyword.length() - 1);
-                }
-
-                put(i, keyword);
-            }
-        }};
+    private static final Map<Integer, String> KEYWORDS = new HashMap<>();
+    static {
+        for (int i : KEYWORD_TYPES.toArray()) {
+            KEYWORDS.put(i, GrammarLexer.VOCABULARY.getSymbolicName(i).toLowerCase());
+        }
+    }
 
-    public static final Map<Integer, KeywordCompletionItem> KEYWORD_ITEMS =
-        new HashMap<Integer, KeywordCompletionItem>() {{
-            for (Map.Entry<Integer, String> keyword : KEYWORDS.entrySet()) {
-                put(keyword.getKey(), new KeywordCompletionItem(keyword.getValue()));
-            }
-        }};
+    public static final Map<Integer, KeywordCompletionItem> KEYWORD_ITEMS = new HashMap<>();
+    static {
+        for (Map.Entry<Integer, String> keyword : KEYWORDS.entrySet()) {
+            KEYWORD_ITEMS.put(keyword.getKey(), new KeywordCompletionItem(keyword.getValue()));
+        }
+    }
 
     private final String keyword;
     private String leftText;
diff --git a/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/AbstractGrammarDebuggerEditorKit.java b/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/AbstractGrammarDebuggerEditorKit.java
@@ -23,6 +23,7 @@
 import javax.swing.text.Document;
 import javax.xml.bind.DatatypeConverter;
 import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.VocabularyImpl;
 import org.antlr.v4.runtime.atn.ATN;
 import org.antlr.v4.runtime.atn.ATNDeserializer;
 import org.antlr.works.editor.grammar.debugger.LexerDebuggerTokenHighlighterLayerFactory.LexerOpCode;
@@ -50,10 +51,9 @@ public abstract class AbstractGrammarDebuggerEditorKit extends NbEditorKit {
     public static final String PROP_CHANNELS = "Channels";
 
     /**
-     * The names of tokens in the associated grammar, stored as an array of
-     * strings {@link String}{@code []}.
+     * The token vocabulary, stored as a {@link Vocabulary} instance.
      */
-    public static final String PROP_TOKEN_NAMES = "Token Names";
+    public static final String PROP_VOCABULARY = "Vocabulary";
     /**
      * The names of rules in the associated grammar, stored as an array of
      * strings {@link String}{@code []}.
@@ -80,19 +80,24 @@ public void read(Reader in, Document doc, int pos) throws IOException, BadLocati
         super.read(new InputStreamReader(inputStream, UTF_8), doc, pos);
 
         // read the token names
-        int tokenNamesOffset = 4 + inputSize;
-        int tokenNamesSize = readInteger(binary, tokenNamesOffset);
-        String[] tokenNames = readStrings(binary, tokenNamesOffset + 4, tokenNamesSize);
-        doc.putProperty(PROP_TOKEN_NAMES, tokenNames);
+        int literalNamesOffset = 4 + inputSize;
+        int literalNamesSize = readInteger(binary, literalNamesOffset);
+        String[] literalNames = readStrings(binary, literalNamesOffset + 4, literalNamesSize);
+
+        int symbolicNamesOffset = literalNamesOffset + 4 + literalNamesSize;
+        int symbolicNamesSize = readInteger(binary, symbolicNamesOffset);
+        String[] symbolicNames = readStrings(binary, symbolicNamesOffset + 4, symbolicNamesSize);
+
+        doc.putProperty(PROP_VOCABULARY, new VocabularyImpl(literalNames, symbolicNames));
 
         // read the rule names
-        int ruleNamesOffset = 4 + inputSize;
+        int ruleNamesOffset = symbolicNamesOffset + 4 + symbolicNamesSize;
         int ruleNamesSize = readInteger(binary, ruleNamesOffset);
         String[] ruleNames = readStrings(binary, ruleNamesOffset + 4, ruleNamesSize);
         doc.putProperty(PROP_RULE_NAMES, ruleNames);
 
         // read the mode names
-        int modeNamesOffset = tokenNamesOffset + 4 + tokenNamesSize;
+        int modeNamesOffset = ruleNamesOffset + 4 + ruleNamesSize;
         int modeNamesSize = readInteger(binary, modeNamesOffset);
         String[] modeNames = readStrings(binary, modeNamesOffset + 4, modeNamesSize);
         doc.putProperty(PROP_MODE_NAMES, modeNames);
diff --git a/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/AbstractInterpreterData.java b/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/AbstractInterpreterData.java
@@ -9,12 +9,8 @@
 
 package org.antlr.works.editor.grammar.debugger;
 
-import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
-import org.antlr.v4.misc.Utils;
-import org.antlr.v4.tool.Grammar;
-import org.antlr.works.editor.grammar.debugger.LexerDebuggerControllerTopComponent.TokenDescriptor;
+import org.antlr.v4.runtime.Vocabulary;
 
 /**
  *
@@ -24,47 +20,8 @@ public class AbstractInterpreterData {
 
     public String grammarFileName;
     public String serializedAtn;
-    public List<TokenDescriptor> tokenNames;
+    public Vocabulary vocabulary;
     public List<String> ruleNames;
     public int startRuleIndex;
 
-    public static TokenDescriptor[] getTokenNames(Grammar grammar) {
-        int numTokens = grammar.getMaxTokenType();
-        List<String> typeToStringLiteralList = new ArrayList<>(grammar.typeToStringLiteralList);
-        Utils.setSize(typeToStringLiteralList, numTokens + 1);
-        for (Map.Entry<String, Integer> entry : grammar.stringLiteralToTypeMap.entrySet()) {
-            if (entry.getValue() < 0 || entry.getValue() >= typeToStringLiteralList.size()) {
-                continue;
-            }
-
-            typeToStringLiteralList.set(entry.getValue(), entry.getKey());
-        }
-
-        TokenDescriptor[] tokenNames = new TokenDescriptor[numTokens+1];
-        for (int i = 0; i < tokenNames.length; i++) {
-            tokenNames[i] = new TokenDescriptor();
-        }
-
-        for (String tokenName : grammar.tokenNameToTypeMap.keySet()) {
-            Integer ttype = grammar.tokenNameToTypeMap.get(tokenName);
-            if (ttype < 0 || ttype >= tokenNames.length) {
-                continue;
-            }
-
-            if ( tokenName!=null && tokenName.startsWith(Grammar.AUTO_GENERATED_TOKEN_NAME_PREFIX) ) {
-                if (ttype < typeToStringLiteralList.size()) {
-                    String literal = typeToStringLiteralList.get(ttype);
-                    if (literal != null) {
-                        tokenNames[ttype].literal = literal;
-                    }
-                }
-            }
-
-            tokenNames[ttype].name = tokenName;
-            tokenNames[ttype].value = ttype;
-        }
-
-        return tokenNames;
-    }
-
 }
diff --git a/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/LexerDebuggerControllerTopComponent.java b/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/LexerDebuggerControllerTopComponent.java
@@ -31,6 +31,9 @@
 import javax.swing.text.JTextComponent;
 import javax.swing.text.StyledDocument;
 import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.Token;
+import org.antlr.v4.runtime.Vocabulary;
+import org.antlr.v4.runtime.VocabularyImpl;
 import org.netbeans.api.annotations.common.NonNull;
 import org.netbeans.api.editor.EditorRegistry;
 import org.netbeans.api.settings.ConvertAsProperties;
@@ -601,29 +604,46 @@ public void propertyChange(PropertyChangeEvent evt) {
                 }
 
                 Document document = component.getDocument();
-                TokenDescriptor[] tokenDescriptorArray = (TokenDescriptor[])document.getProperty(LexerDebuggerEditorKit.PROP_TOKEN_NAMES);
+                Vocabulary vocabulary = (Vocabulary)document.getProperty(LexerDebuggerEditorKit.PROP_VOCABULARY);
                 String[] modeNamesArray = (String[])document.getProperty(LexerDebuggerEditorKit.PROP_MODE_NAMES);
-                List<TokenDescriptor> tokenDescriptors = tokenDescriptorArray != null ? Arrays.asList(tokenDescriptorArray) : Collections.<TokenDescriptor>emptyList();
                 List<String> modeNames = modeNamesArray != null ? Arrays.asList(modeNamesArray) : Collections.<String>emptyList();
-                if (tokenDescriptors.isEmpty()) {
+                if (vocabulary == null) {
                     LexerInterpreterData lexerInterpreterData = (LexerInterpreterData)document.getProperty(LexerDebuggerEditorKit.PROP_LEXER_INTERP_DATA);
                     if (lexerInterpreterData != null) {
-                        tokenDescriptors = lexerInterpreterData.tokenNames;
+                        vocabulary = lexerInterpreterData.vocabulary;
                         modeNames = lexerInterpreterData.modeNames;
                     }
                 }
 
-                final List<TokenDescriptor> finalTokenDescriptors = tokenDescriptors;
+                final Vocabulary finalVocabulary = vocabulary != null ? vocabulary : VocabularyImpl.EMPTY_VOCABULARY;
                 final List<String> finalModeNames = modeNames;
 
                 currentComponent = component;
 
                 tblTokenTypes.setModel(new AbstractTableModel() {
-                    private final List<TokenDescriptor> elements = finalTokenDescriptors;
+                    private final List<String> literalNames = new ArrayList<>();
+                    private final List<String> symbolicNames = new ArrayList<>();
+                    private final List<Integer> values = new ArrayList<>();
+
+                    {
+                        // TODO: Find a better way to communicate this value
+                        int maxTokenType = 1024;
+                        for (int i = 0; i <= maxTokenType; i++) {
+                            String literalName = finalVocabulary.getLiteralName(i);
+                            String symbolicName = finalVocabulary.getSymbolicName(i);
+                            if (literalName == null && symbolicName == null) {
+                                continue;
+                            }
+
+                            literalNames.add(literalName != null ? literalName : "");
+                            symbolicNames.add(symbolicName != null ? symbolicName : literalName);
+                            values.add(i);
+                        }
+                    }
 
                     @Override
                     public int getRowCount() {
-                        return elements.size();
+                        return literalNames.size();
                     }
 
                     @Override
@@ -664,11 +684,11 @@ public Class<?> getColumnClass(int columnIndex) {
                     public Object getValueAt(int rowIndex, int columnIndex) {
                         switch (columnIndex) {
                         case 0:
-                            return elements.get(rowIndex).name;
+                            return symbolicNames.get(rowIndex);
                         case 1:
-                            return elements.get(rowIndex).literal;
+                            return literalNames.get(rowIndex);
                         case 2:
-                            return elements.get(rowIndex).value;
+                            return values.get(rowIndex);
                         default:
                             throw new IllegalArgumentException();
                         }
@@ -690,15 +710,16 @@ public TraceToken getElementAt(int index) {
 
                 });
 
-                String[] tokenNamesArray = new String[tokenDescriptors.size()];
-                for (int i = 0; i < tokenDescriptors.size(); i++) {
-                    tokenNamesArray[i] = tokenDescriptors.get(i).literal;
-                    if (tokenNamesArray[i] == null || tokenNamesArray[i].isEmpty()) {
-                        tokenNamesArray[i] = tokenDescriptors.get(i).name;
+                List<String> tokenNames = new ArrayList<>();
+                for (int i = Token.EOF; i < 1024; i++) {
+                    if (finalVocabulary.getLiteralName(i) == null && finalVocabulary.getSymbolicName(i) == null) {
+                        continue;
                     }
+
+                    tokenNames.add(finalVocabulary.getDisplayName(i));
                 }
 
-                lstTokens.setCellRenderer(new TraceTokenListCellRenderer(Arrays.asList(tokenNamesArray)));
+                lstTokens.setCellRenderer(new TraceTokenListCellRenderer(tokenNames));
 
                 lstChannels.setModel(new AbstractListModel<Object>() {
                     private final Object[] elements = { defaultChannelText, hiddenChannelText };
@@ -824,10 +845,4 @@ public Component getListCellRendererComponent(JList<?> list, Object value, int i
         }
 
     }
-
-    public static class TokenDescriptor {
-        public String name = "";
-        public String literal = "";
-        public int value;
-    }
 }
diff --git a/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/LexerInterpreterData.java b/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/LexerInterpreterData.java
@@ -86,7 +86,7 @@ public static LexerInterpreterData buildFromSnapshot(DocumentSnapshot snapshot)
         LexerInterpreterData data = new LexerInterpreterData();
         data.grammarFileName = lexerGrammar.fileName;
         data.serializedAtn = ATNSerializer.getSerializedAsString(lexerGrammar.atn, Arrays.asList(lexerGrammar.getRuleNames()));
-        data.tokenNames = new ArrayList<>(Arrays.asList(getTokenNames(lexerGrammar)));
+        data.vocabulary = lexerGrammar.getVocabulary();
         data.ruleNames = new ArrayList<>(lexerGrammar.rules.keySet());
         data.modeNames = new ArrayList<>(lexerGrammar.modes.keySet());
         return data;
diff --git a/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/ParserDebuggerReferenceAnchorsParserTask.java b/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/ParserDebuggerReferenceAnchorsParserTask.java
@@ -8,7 +8,6 @@
  */
 package org.antlr.works.editor.grammar.debugger;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
@@ -43,6 +42,7 @@
 import org.antlr.v4.runtime.Token;
 import org.antlr.v4.runtime.TokenSource;
 import org.antlr.v4.runtime.TokenStream;
+import org.antlr.v4.runtime.Vocabulary;
 import org.antlr.v4.runtime.atn.ATN;
 import org.antlr.v4.runtime.atn.ATNDeserializer;
 import org.antlr.v4.runtime.atn.ATNState;
@@ -53,7 +53,6 @@
 import org.antlr.works.editor.antlr4.classification.TaggerTokenSource;
 import org.antlr.works.editor.antlr4.parsing.DescriptiveErrorListener;
 import org.antlr.works.editor.antlr4.parsing.SyntaxErrorListener;
-import org.antlr.works.editor.grammar.debugger.LexerDebuggerControllerTopComponent.TokenDescriptor;
 import org.netbeans.api.editor.mimelookup.MimeRegistration;
 
 /**
@@ -92,13 +91,10 @@ public void parse(ParserTaskManager taskManager, ParseContext context, DocumentS
 
                 ParserInterpreterData parserInterpreterData = (ParserInterpreterData)snapshot.getVersionedDocument().getDocument().getProperty(ParserDebuggerEditorKit.PROP_PARSER_INTERP_DATA);
                 String grammarFileName = parserInterpreterData.grammarFileName;
-                List<String> tokenNames = new ArrayList<>(parserInterpreterData.tokenNames.size());
-                for (TokenDescriptor tokenDescriptor : parserInterpreterData.tokenNames) {
-                    tokenNames.add(tokenDescriptor.name);
-                }
+                Vocabulary vocabulary = parserInterpreterData.vocabulary;
                 List<String> ruleNames = parserInterpreterData.ruleNames;
                 ATN atn = new ATNDeserializer().deserialize(parserInterpreterData.serializedAtn.toCharArray());
-                TracingParserInterpreter parser = new TracingParserInterpreter(grammarFileName, tokenNames, ruleNames, atn, tokenStream);
+                TracingParserInterpreter parser = new TracingParserInterpreter(grammarFileName, vocabulary, ruleNames, atn, tokenStream);
 
                 long startTime = System.nanoTime();
                 parser.setInterpreter(new StatisticsParserATNSimulator(parser, atn));
@@ -128,8 +124,8 @@ public void parse(ParserTaskManager taskManager, ParseContext context, DocumentS
     public static class TracingParserInterpreter extends ParserInterpreter {
         public final Map<ParseTree, Transition> associatedTransitions = new IdentityHashMap<>();
 
-        public TracingParserInterpreter(String grammarFileName, Collection<String> tokenNames, Collection<String> ruleNames, ATN atn, TokenStream input) {
-            super(grammarFileName, tokenNames, ruleNames, atn, input);
+        public TracingParserInterpreter(String grammarFileName, Vocabulary vocabulary, Collection<String> ruleNames, ATN atn, TokenStream input) {
+            super(grammarFileName, vocabulary, ruleNames, atn, input);
         }
 
         @Override
diff --git a/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/ParserDebuggerTokensTaskTaggerSnapshot.java b/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/ParserDebuggerTokensTaskTaggerSnapshot.java
@@ -8,19 +8,18 @@
  */
 package org.antlr.works.editor.grammar.debugger;
 
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import org.antlr.netbeans.editor.text.DocumentSnapshot;
 import org.antlr.v4.runtime.CharStream;
 import org.antlr.v4.runtime.LexerInterpreter;
 import org.antlr.v4.runtime.TokenSource;
+import org.antlr.v4.runtime.Vocabulary;
 import org.antlr.v4.runtime.atn.ATN;
 import org.antlr.v4.runtime.atn.ATNDeserializer;
 import org.antlr.works.editor.antlr4.classification.AbstractTokensTaskTaggerSnapshot;
 import org.antlr.works.editor.antlr4.classification.SimpleLexerState;
 import org.antlr.works.editor.antlr4.highlighting.TokenSourceWithStateV4;
-import org.antlr.works.editor.grammar.debugger.LexerDebuggerControllerTopComponent.TokenDescriptor;
 import org.netbeans.api.annotations.common.NonNull;
 
 /**
@@ -49,31 +48,23 @@ protected SimpleLexerState getStartState() {
     @Override
     protected TokenSourceWithStateV4<SimpleLexerState> createLexer(CharStream input, SimpleLexerState startState) {
         ATN atn = new ATNDeserializer().deserialize(lexerInterpreterData.serializedAtn.toCharArray());
-        List<String> tokenNames = new ArrayList<>();
-        for (TokenDescriptor tokenDescriptor : lexerInterpreterData.tokenNames) {
-            tokenNames.add(tokenDescriptor.name);
-        }
-
+        Vocabulary vocabulary = lexerInterpreterData.vocabulary;
         String grammarFileName = lexerInterpreterData.grammarFileName;
         List<String> ruleNames = lexerInterpreterData.ruleNames;
         List<String> modeNames = lexerInterpreterData.modeNames;
-        ParserDebuggerLexerWrapper lexer = new ParserDebuggerLexerWrapper(grammarFileName, tokenNames, ruleNames, modeNames, atn, input);
+        ParserDebuggerLexerWrapper lexer = new ParserDebuggerLexerWrapper(grammarFileName, vocabulary, ruleNames, modeNames, atn, input);
         startState.apply(lexer);
         return lexer;
     }
 
     @Override
     protected TokenSource getEffectiveTokenSource(TokenSourceWithStateV4<SimpleLexerState> lexer) {
         ATN atn = new ATNDeserializer().deserialize(lexerInterpreterData.serializedAtn.toCharArray());
-        List<String> tokenNames = new ArrayList<>();
-        for (TokenDescriptor tokenDescriptor : lexerInterpreterData.tokenNames) {
-            tokenNames.add(tokenDescriptor.name);
-        }
-
+        Vocabulary vocabulary = lexerInterpreterData.vocabulary;
         String grammarFileName = lexerInterpreterData.grammarFileName;
         List<String> ruleNames = lexerInterpreterData.ruleNames;
         List<String> modeNames = lexerInterpreterData.modeNames;
-        return new ParserDebuggerLexerWrapper(grammarFileName, tokenNames, ruleNames, modeNames, atn, lexer.getInputStream());
+        return new ParserDebuggerLexerWrapper(grammarFileName, vocabulary, ruleNames, modeNames, atn, lexer.getInputStream());
     }
 
     @Override
@@ -83,8 +74,8 @@ protected ParserDebuggerTokensTaskTaggerSnapshot translateToImpl(@NonNull Docume
 
     private static class ParserDebuggerLexerWrapper extends LexerInterpreter implements TokenSourceWithStateV4<SimpleLexerState> {
 
-        public ParserDebuggerLexerWrapper(String grammarFileName, Collection<String> tokenNames, Collection<String> ruleNames, Collection<String> modeNames, ATN atn, CharStream input) {
-            super(grammarFileName, tokenNames, ruleNames, modeNames, atn, input);
+        public ParserDebuggerLexerWrapper(String grammarFileName, Vocabulary vocabulary, Collection<String> ruleNames, Collection<String> modeNames, ATN atn, CharStream input) {
+            super(grammarFileName, vocabulary, ruleNames, modeNames, atn, input);
         }
 
         @Override
diff --git a/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/ParserInterpreterData.java b/org-antlr-works-editor/src/org/antlr/works/editor/grammar/debugger/ParserInterpreterData.java