Skip to content

feat: open source the JavaScript extractor code #55

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions language/javascript/extractor/.eslintignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
node_modules/
test/
dist/
out/
10 changes: 10 additions & 0 deletions language/javascript/extractor/.eslintrc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// eslint-disable-next-line no-undef
module.exports = {
root: true,
parser: '@typescript-eslint/parser',
plugins: ['@typescript-eslint'],
extends: ['eslint:recommended', 'plugin:@typescript-eslint/recommended'],
rules: {
'@typescript-eslint/no-empty-interface': 'off',
},
};
14 changes: 14 additions & 0 deletions language/javascript/extractor/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
.DS_Store

.vscode/
.idea/

node_modules/
coverage/
dist/
out/

bazel-*

*.db
*.db-journal
4 changes: 4 additions & 0 deletions language/javascript/extractor/.prettierignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
node_modules/
test/
dist/
out/
15 changes: 15 additions & 0 deletions language/javascript/extractor/.prettierrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"arrowParens": "avoid",
"bracketSpacing": true,
"endOfLine": "lf",
"bracketSameLine": false,
"jsxSingleQuote": false,
"printWidth": 80,
"proseWrap": "preserve",
"quoteProps": "as-needed",
"semi": true,
"singleQuote": true,
"tabWidth": 2,
"trailingComma": "all",
"useTabs": false
}
9 changes: 9 additions & 0 deletions language/javascript/extractor/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Introduction
coref-js-src-extractor is a data generation tool that extracts JavaScript / TypeScript source code into COREF format data.

# Quick Start
1. Environment requirements Node>=18.
2. Install yarn. `npm install --global yarn` and then install dependencies with `yarn`.
3. Build && package. `npm run pkg`. A coref-javascript-src-extractor executable file will be generated in the current directory.
4. Run the extractor. `./coref-javascript-src-extractor extract -s {source_root}`.
5. For more help. `./coref-javascript-src-extractor -h` or `./coref-javascript-src-extractor extract -h`.
9 changes: 9 additions & 0 deletions language/javascript/extractor/README_cn.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# 介绍
coref-js-src-extractor 是一个数据生成工具,该工具将 JavaScript / TypeScript 源码抽取为COREF格式的数据。

# 快速开始
1. 环境要求 Node>=18。
2. 安装 yarn。 `npm install --global yarn` 并安装依赖 `yarn`。
3. 构建 && 打包。 `npm run pkg`. 当前目录下会生成 `coref-javascript-src-extractor` 可执行文件。
4. 运行抽取器。`./coref-javascript-src-extractor extract -s {source_root}`。
5. 查看更多帮助。`./coref-javascript-src-extractor -h` 或 `./coref-javascript-src-extractor extract -h`。
221 changes: 221 additions & 0 deletions language/javascript/extractor/coref-javascript-er.puml
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
@startuml
' https://plantuml.com/ie-diagram

' Location table
entity location {
oid: INTEGER <<PK>>
file_oid: INTEGER
start_line_number: INTEGER
start_column_number: INTEGER
end_line_number: INTEGER
end_column_number: INTEGER
text: TEXT
}


' Number of lines table
entity number_of_lines {
location_oid: INTEGER <<PK>>
lines: INTEGER
code_lines: INTEGER
comment_lines: INTEGER
}


entity file {
oid: INTEGER <<PK>>
name: TEXT
extension: TEXT
relative_path: TEXT
location_oid: INTEGER
}

entity directory {
oid: INTEGER <<PK>>
name: TEXT
relative_path: TEXT
location_oid: INTEGER
}

' parent_oid: ref directory
' child_oid: ref directory | file
entity directory_hierarchy {
parent_oid: INTEGER
child_oid: INTEGER <<PK>>
}

' kind:
' enum {
' script = 0
' inline_script = 1
' event_handler = 2
' javascript_url = 3
' template_top_level = 4
' }
entity top_level {
oid: INTEGER <<PK>>
kind: INTEGER
location_oid: INTEGER
}

entity node {
oid: INTEGER <<PK>>
kind: INTEGER
parent_oid: INTEGER
index: INTEGER
location_oid: INTEGER
}

entity literal {
oid: INTEGER <<PK>>
value: TEXT
}

entity binding_element_property_name {
oid: INTEGER <<PK>>
property_name_oid: INTEGER
}

entity binding_element_name {
oid: INTEGER <<PK>>
name_oid: INTEGER
}

entity binding_element_initializer {
oid: INTEGER <<PK>>
initializer_oid: INTEGER
}

'
' Class declaration or expression
'
entity class_like_declaration {
oid: INTEGER <<PK>>
kind: INTEGER
name: TEXT
}

entity function_like_declaration {
oid: INTEGER <<PK>>
kind: INTEGER
name: TEXT
}

entity function_enclosing_node {
node_oid: INTEGER <<PK>>
function_oid: INTEGER
}

entity modifier {
oid: INTEGER <<PK>>
index: INTEGER
}

' use `symbol_` instead of `symbol` to avoid the typescript error TS2457: Type alias name cannot be 'symbol'.
entity symbol_ {
oid: INTEGER <<PK>>
name: TEXT
description: TEXT
}

entity node_symbol {
node_oid: INTEGER <<PK>>
symbol_oid: INTEGER
}

entity shorthand_assignment_value_symbol {
node_oid: INTEGER <<PK>>
symbol_oid: INTEGER
}

entity call_site {
invoke_expression_oid: INTEGER <<PK>>
callee_oid: INTEGER
}

entity cfg_entry_node {
oid: INTEGER <<PK>>
ast_node_oid: INTEGER
}

entity cfg_exit_node {
oid: INTEGER <<PK>>
ast_node_oid: INTEGER
}


' ----
' Type
' ----

' ' Type table
' '
' entity type {
' oid: INTEGER <<PK>>
' kind: INTEGER
' name: TEXT
' }

' ' Type hierarchy table
' '
' ' child_oid: ref type
' ' parent_oid: ref type
' ' index: the index of child type in the parent type
' entity type_hierarchy {
' oid: INTEGER <<PK>>
' parent_oid: INTEGER
' child_oid: INTEGER
' index: INTEGER
' }

' ' Type alias table
' '
' ' alias_type_oid: ref type
' ' underlying_type_oid: ref type
' entity type_alias {
' alias_type_oid: INTEGER <<PK>>
' underlying_type_oid: INTEGER
' }

' Comment table
entity comment {
oid: INTEGER <<PK>>
kind: INTEGER
location_oid: INTEGER
}

' Node comment table
entity node_comment {
oid: INTEGER <<PK>>
node_oid: INTEGER
comment_oid: INTEGER
type: INTEGER
}

' entity token {
' oid: INTEGER <<PK>>
' kind: INTEGER
' location_oid: INTEGER
' }

' JS parse error table
entity js_parse_error {
oid: INTEGER <<PK>>
message: TEXT
line: TEXT
}

entity metadata {
oid: INTEGER <<PK>>
version: TEXT
created_time: DATETIME
}

entity ignored_path {
oid: INTEGER <<PK>>
path_kind: INTEGER
path: TEXT
ignore_kind: INTEGER
}

@enduml
Loading