Skip to content

Commit d6abca7

Browse files
committed
Initial commit
0 parents  commit d6abca7

21 files changed

+21369
-0
lines changed

.editorconfig

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
root = true
2+
3+
[*]
4+
indent_style = space
5+
indent_size = 2

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
node_modules
2+
dist

.travis.yml

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
language: node_js
2+
sudo: false
3+
node_js:
4+
- "lts/*"

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) 2017 Uri Shaked and contributors
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in
13+
all copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
THE SOFTWARE.

README.md

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# java-ast
2+
3+
Java Parser for JavaScript/TypeScript, based on [antlr4ts](https://www.npmjs.com/package/antlr4ts)
4+
5+
[![Build Status](https://travis-ci.org/urish/java-ast.png?branch=master)](https://travis-ci.org/urish/java-ast)
6+
[![code style: prettier](https://img.shields.io/badge/code_style-prettier-ff69b4.svg?style=flat-square)](https://github.com/prettier/prettier)
7+
8+
## Usage Example
9+
10+
```typescript
11+
import { parse } from './index';
12+
13+
const ast = parse(`package test;\n\nclass TestClass {}\n`);
14+
// do something with ast, e.g. console.log(ast.toStringTree());
15+
```

package.json

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{
2+
"name": "java-ast",
3+
"version": "0.0.1",
4+
"description": "Java Parser for JavaScript/TypeScript, based on antlr4ts",
5+
"main": "dist/index.js",
6+
"typings": "dist/index.d.ts",
7+
"license": "MIT",
8+
"repository": "https://github.com/urish/java-ast",
9+
"author": "Uri Shaked <[email protected]>",
10+
"files": [
11+
"dist"
12+
],
13+
"scripts": {
14+
"build": "rimraf dist && tsc",
15+
"format": "prettier --write src/**.ts **/*.json",
16+
"prepublish": "yarn build",
17+
"generate:parser": "antlr4ts -visitor -o src/parser src/parser/JavaLexer.g4 src/parser/JavaParser.g4",
18+
"precommit": "lint-staged",
19+
"postcommit": "git update-index --again",
20+
"test": "jest"
21+
},
22+
"devDependencies": {
23+
"@types/jest": "^23.1.5",
24+
"antlr4ts-cli": "^0.4.0-alpha.4",
25+
"husky": "^0.14.3",
26+
"jest": "^23.3.0",
27+
"lint-staged": "^7.2.0",
28+
"prettier": "^1.13.7",
29+
"rimraf": "^2.6.2",
30+
"ts-jest": "^23.0.0",
31+
"tslint": "^5.10.0",
32+
"typescript": "^2.9.2"
33+
},
34+
"dependencies": {
35+
"antlr4ts": "^0.4.1-alpha.0"
36+
},
37+
"lint-staged": {
38+
"*.{js,json}": [
39+
"prettier --write",
40+
"git add"
41+
],
42+
"*.ts": [
43+
"prettier --write",
44+
"tslint --fix",
45+
"git add"
46+
]
47+
},
48+
"jest": {
49+
"transform": {
50+
"^.+\\.tsx?$": "ts-jest"
51+
},
52+
"testRegex": "(/__tests__/.*|(\\.|/)(test|spec))\\.(jsx?|tsx?)$",
53+
"moduleFileExtensions": [
54+
"ts",
55+
"tsx",
56+
"js",
57+
"jsx",
58+
"json",
59+
"node"
60+
]
61+
}
62+
}

prettier.config.js

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
module.exports = {
2+
arrowParens: 'always',
3+
printWidth: 100,
4+
singleQuote: true,
5+
tabWidth: 2,
6+
trailingComma: 'all',
7+
};

src/index.spec.ts

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import { parse } from './index';
2+
3+
describe('Java AST parser', () => {
4+
it('should parse the given Java code and return the AST', () => {
5+
const tree = parse(`
6+
class TestClass {
7+
}
8+
`);
9+
expect(tree.children[0].getChild(0).getChild(1).text).toEqual('TestClass');
10+
});
11+
});

src/index.ts

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import { ANTLRInputStream, CommonTokenStream } from 'antlr4ts';
2+
import { JavaLexer } from './parser/JavaLexer';
3+
import { JavaParser } from './parser/JavaParser';
4+
5+
/**
6+
* Parses the given source code and returns the AST
7+
* @param source Java source code to parse
8+
*/
9+
export function parse(source: string) {
10+
const chars = new ANTLRInputStream(source);
11+
const lexer = new JavaLexer(chars);
12+
const tokens = new CommonTokenStream(lexer);
13+
const parser = new JavaParser(tokens);
14+
return parser.compilationUnit();
15+
}

src/parser/JavaLexer.g4

+211
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
/*
2+
[The "BSD licence"]
3+
Copyright (c) 2013 Terence Parr, Sam Harwell
4+
Copyright (c) 2017 Ivan Kochurkin (upgrade to Java 8)
5+
All rights reserved.
6+
7+
Redistribution and use in source and binary forms, with or without
8+
modification, are permitted provided that the following conditions
9+
are met:
10+
1. Redistributions of source code must retain the above copyright
11+
notice, this list of conditions and the following disclaimer.
12+
2. Redistributions in binary form must reproduce the above copyright
13+
notice, this list of conditions and the following disclaimer in the
14+
documentation and/or other materials provided with the distribution.
15+
3. The name of the author may not be used to endorse or promote products
16+
derived from this software without specific prior written permission.
17+
18+
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19+
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20+
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21+
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22+
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23+
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27+
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
*/
29+
30+
lexer grammar JavaLexer;
31+
32+
// Keywords
33+
34+
ABSTRACT: 'abstract';
35+
ASSERT: 'assert';
36+
BOOLEAN: 'boolean';
37+
BREAK: 'break';
38+
BYTE: 'byte';
39+
CASE: 'case';
40+
CATCH: 'catch';
41+
CHAR: 'char';
42+
CLASS: 'class';
43+
CONST: 'const';
44+
CONTINUE: 'continue';
45+
DEFAULT: 'default';
46+
DO: 'do';
47+
DOUBLE: 'double';
48+
ELSE: 'else';
49+
ENUM: 'enum';
50+
EXTENDS: 'extends';
51+
FINAL: 'final';
52+
FINALLY: 'finally';
53+
FLOAT: 'float';
54+
FOR: 'for';
55+
IF: 'if';
56+
GOTO: 'goto';
57+
IMPLEMENTS: 'implements';
58+
IMPORT: 'import';
59+
INSTANCEOF: 'instanceof';
60+
INT: 'int';
61+
INTERFACE: 'interface';
62+
LONG: 'long';
63+
NATIVE: 'native';
64+
NEW: 'new';
65+
PACKAGE: 'package';
66+
PRIVATE: 'private';
67+
PROTECTED: 'protected';
68+
PUBLIC: 'public';
69+
RETURN: 'return';
70+
SHORT: 'short';
71+
STATIC: 'static';
72+
STRICTFP: 'strictfp';
73+
SUPER: 'super';
74+
SWITCH: 'switch';
75+
SYNCHRONIZED: 'synchronized';
76+
THIS: 'this';
77+
THROW: 'throw';
78+
THROWS: 'throws';
79+
TRANSIENT: 'transient';
80+
TRY: 'try';
81+
VOID: 'void';
82+
VOLATILE: 'volatile';
83+
WHILE: 'while';
84+
85+
// Literals
86+
87+
DECIMAL_LITERAL: ('0' | [1-9] (Digits? | '_'+ Digits)) [lL]?;
88+
HEX_LITERAL: '0' [xX] [0-9a-fA-F] ([0-9a-fA-F_]* [0-9a-fA-F])? [lL]?;
89+
OCT_LITERAL: '0' '_'* [0-7] ([0-7_]* [0-7])? [lL]?;
90+
BINARY_LITERAL: '0' [bB] [01] ([01_]* [01])? [lL]?;
91+
92+
FLOAT_LITERAL: (Digits '.' Digits? | '.' Digits) ExponentPart? [fFdD]?
93+
| Digits (ExponentPart [fFdD]? | [fFdD])
94+
;
95+
96+
HEX_FLOAT_LITERAL: '0' [xX] (HexDigits '.'? | HexDigits? '.' HexDigits) [pP] [+-]? Digits [fFdD]?;
97+
98+
BOOL_LITERAL: 'true'
99+
| 'false'
100+
;
101+
102+
CHAR_LITERAL: '\'' (~['\\\r\n] | EscapeSequence) '\'';
103+
104+
STRING_LITERAL: '"' (~["\\\r\n] | EscapeSequence)* '"';
105+
106+
NULL_LITERAL: 'null';
107+
108+
// Separators
109+
110+
LPAREN: '(';
111+
RPAREN: ')';
112+
LBRACE: '{';
113+
RBRACE: '}';
114+
LBRACK: '[';
115+
RBRACK: ']';
116+
SEMI: ';';
117+
COMMA: ',';
118+
DOT: '.';
119+
120+
// Operators
121+
122+
ASSIGN: '=';
123+
GT: '>';
124+
LT: '<';
125+
BANG: '!';
126+
TILDE: '~';
127+
QUESTION: '?';
128+
COLON: ':';
129+
EQUAL: '==';
130+
LE: '<=';
131+
GE: '>=';
132+
NOTEQUAL: '!=';
133+
AND: '&&';
134+
OR: '||';
135+
INC: '++';
136+
DEC: '--';
137+
ADD: '+';
138+
SUB: '-';
139+
MUL: '*';
140+
DIV: '/';
141+
BITAND: '&';
142+
BITOR: '|';
143+
CARET: '^';
144+
MOD: '%';
145+
146+
ADD_ASSIGN: '+=';
147+
SUB_ASSIGN: '-=';
148+
MUL_ASSIGN: '*=';
149+
DIV_ASSIGN: '/=';
150+
AND_ASSIGN: '&=';
151+
OR_ASSIGN: '|=';
152+
XOR_ASSIGN: '^=';
153+
MOD_ASSIGN: '%=';
154+
LSHIFT_ASSIGN: '<<=';
155+
RSHIFT_ASSIGN: '>>=';
156+
URSHIFT_ASSIGN: '>>>=';
157+
158+
// Java 8 tokens
159+
160+
ARROW: '->';
161+
COLONCOLON: '::';
162+
163+
// Additional symbols not defined in the lexical specification
164+
165+
AT: '@';
166+
ELLIPSIS: '...';
167+
168+
// Whitespace and comments
169+
170+
WS: [ \t\r\n\u000C]+ -> channel(HIDDEN);
171+
COMMENT: '/*' .*? '*/' -> channel(HIDDEN);
172+
LINE_COMMENT: '//' ~[\r\n]* -> channel(HIDDEN);
173+
174+
// Identifiers
175+
176+
IDENTIFIER: Letter LetterOrDigit*;
177+
178+
// Fragment rules
179+
180+
fragment ExponentPart
181+
: [eE] [+-]? Digits
182+
;
183+
184+
fragment EscapeSequence
185+
: '\\' [btnfr"'\\]
186+
| '\\' ([0-3]? [0-7])? [0-7]
187+
| '\\' 'u'+ HexDigit HexDigit HexDigit HexDigit
188+
;
189+
190+
fragment HexDigits
191+
: HexDigit ((HexDigit | '_')* HexDigit)?
192+
;
193+
194+
fragment HexDigit
195+
: [0-9a-fA-F]
196+
;
197+
198+
fragment Digits
199+
: [0-9] ([0-9_]* [0-9])?
200+
;
201+
202+
fragment LetterOrDigit
203+
: Letter
204+
| [0-9]
205+
;
206+
207+
fragment Letter
208+
: [a-zA-Z$_] // these are the "java letters" below 0x7F
209+
| ~[\u0000-\u007F\uD800-\uDBFF] // covers all characters above 0x7F which are not a surrogate
210+
| [\uD800-\uDBFF] [\uDC00-\uDFFF] // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
211+
;

0 commit comments

Comments
 (0)