Skip to content

Prototype for conversion to CSTParser.EXPR #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
268 changes: 268 additions & 0 deletions prototypes/cst_conversion.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
# A prototype for converting JuliaSyntax data structures into CSTParser.EXPR.

using CSTParser

using JuliaSyntax
using JuliaSyntax: GreenNode, SyntaxHead, SourceFile, TaggedRange,
@K_str, @KSet_cmd,
haschildren, is_syntax_kind, is_keyword, is_operator, is_identifier, head, kind, span,
is_infix, is_trivia, untokenize, TzTokens, children

# See CSTParser.tokenkindtoheadmap
function tokenkindtoheadmap(k::TzTokens.Kind)
k === TzTokens.COMMA ? :COMMA :
k === TzTokens.LPAREN ? :LPAREN :
k === TzTokens.RPAREN ? :RPAREN :
k === TzTokens.LSQUARE ? :LSQUARE :
k === TzTokens.RSQUARE ? :RSQUARE :
k === TzTokens.LBRACE ? :LBRACE :
k === TzTokens.RBRACE ? :RBRACE :
k === TzTokens.AT_SIGN ? :ATSIGN :
k === TzTokens.DOT ? :DOT :
k === TzTokens.ABSTRACT ? :ABSTRACT :
k === TzTokens.BAREMODULE ? :BAREMODULE :
k === TzTokens.BEGIN ? :BEGIN :
k === TzTokens.BREAK ? :BREAK :
k === TzTokens.CATCH ? :CATCH :
k === TzTokens.CONST ? :CONST :
k === TzTokens.CONTINUE ? :CONTINUE :
k === TzTokens.DO ? :DO :
k === TzTokens.ELSE ? :ELSE :
k === TzTokens.ELSEIF ? :ELSEIF :
k === TzTokens.END ? :END :
k === TzTokens.EXPORT ? :EXPORT :
k === TzTokens.FINALLY ? :FINALLY :
k === TzTokens.FOR ? :FOR :
k === TzTokens.FUNCTION ? :FUNCTION :
k === TzTokens.GLOBAL ? :GLOBAL :
k === TzTokens.IF ? :IF :
k === TzTokens.IMPORT ? :IMPORT :
k === TzTokens.LET ? :LET :
k === TzTokens.LOCAL ? :LOCAL :
k === TzTokens.MACRO ? :MACRO :
k === TzTokens.MODULE ? :MODULE :
k === TzTokens.MUTABLE ? :MUTABLE :
k === TzTokens.OUTER ? :OUTER :
k === TzTokens.PRIMITIVE ? :PRIMITIVE :
k === TzTokens.QUOTE ? :QUOTE :
k === TzTokens.RETURN ? :RETURN :
k === TzTokens.STRUCT ? :STRUCT :
k === TzTokens.TRY ? :TRY :
k === TzTokens.TYPE ? :TYPE :
k === TzTokens.USING ? :USING :
k === TzTokens.WHILE ? :WHILE :
k === TzTokens.INTEGER ? :INTEGER :
k === TzTokens.BIN_INT ? :BININT :
k === TzTokens.HEX_INT ? :HEXINT :
k === TzTokens.OCT_INT ? :OCTINT :
k === TzTokens.FLOAT ? :FLOAT :
k === TzTokens.STRING ? :STRING :
# k === TzTokens.TRIPLE_STRING ? :TRIPLESTRING :
k === TzTokens.CHAR ? :CHAR :
k === TzTokens.CMD ? :CMD :
# k === TzTokens.TRIPLE_CMD ? :TRIPLECMD :
k === TzTokens.TRUE ? :TRUE :
k === TzTokens.FALSE ? :FALSE :
k === TzTokens.ENDMARKER ? :errortoken :
error("Unknown token $k")
end

# Things which are "trailing trivia" according to CSTParser
#
# "Trailing trivia" is trivia which will be attached to the end of a node.
is_cst_trailing_trivia(x) = kind(x) in KSet`Whitespace NewlineWs Comment ;`

# Convert GreenNode into CSTParser.EXPR
function cst(source::SourceFile, raw_node::GreenNode{SyntaxHead}, position::Integer=1)
node_start = position
cs = children(raw_node)
i = 1
args = CSTParser.EXPR[]
trivia = CSTParser.EXPR[]
last_trivia_span = 0
while i <= length(cs)
raw = cs[i]
if haschildren(raw)
c = cst(source, raw, position)
push!(args, c)
last_trivia_span = c.fullspan - c.span
position += span(raw)
else
start_pos = position
token_start = i
inner_span = span(raw)
position += span(raw)
# Here we append any trailing trivia tokens to the node.
while i < length(cs) && is_cst_trailing_trivia(cs[i+1])
position += span(cs[i+1])
i += 1
end
full_span = position - start_pos
last_trivia_span = full_span - inner_span

# Leaf node
k = kind(raw)
val_range = start_pos:(start_pos + inner_span - 1)
val = source[val_range]

if kind(raw) == K"nothing"
# First `nothing` token in file seems to require this. Why I don't know.
inner_span = full_span
end

# See CSTParser.literalmap. Which we can't use directly because we've
# customized Tokenize.jl :-(
cst_head = k === TzTokens.NOTHING ? :NOTHING :
# FIXME: Following probably need special handling
k === TzTokens.MACRO_NAME ? :IDENTIFIER :
k === TzTokens.CMD_MACRO_NAME ? :IDENTIFIER :
k === TzTokens.STRING_MACRO_NAME ? :IDENTIFIER :
k === TzTokens.DQUOTE ? :DQUOTE :
k === TzTokens.BACKTICK ? :BACKTICK :
is_operator(k) ? :OPERATOR :
is_identifier(k) ? :IDENTIFIER :
tokenkindtoheadmap(k)
# FIXME: STRING, TRIPLE_STRING, CMD, TRIPLE_CMD, need special handling:
# * STRING doesn't incude delimiters (DQUOTE tokens)
# * CMD doesn't include delimiters (BACKTICK tokens)
# * TRIPLE_STRING is a composite of STRING and TRIPLE_DQUOTE
# * TRIPLE_CMD is a composite of CMD and TRIPLE_BACKTICK
# They don't exist anymore as individual tokens

push!(is_trivia(raw) ? trivia : args,
CSTParser.EXPR(cst_head, nothing, nothing, full_span, inner_span, val,
nothing, nothing))
end
i += 1
end

if is_infix(raw_node)
args[1], args[2] = args[2], args[1]
# TODO: Other argument swizzling, as done in SyntaxNode -> Expr conversions
end

full_span = position - node_start
inner_span = full_span - last_trivia_span
k = kind(raw_node)
cst_head = k == K"toplevel" ? :file :
is_operator(k) ? popfirst!(trivia) :
Symbol(lowercase(string(kind(raw_node))))
x = CSTParser.EXPR(cst_head, args,
isempty(trivia) ? nothing : trivia,
full_span, inner_span, nothing, nothing, nothing)
for a in args
a.parent = x
end
for a in trivia
a.parent = x
end
return x
end


# Some steps of conversion to CSTParser.EXPR is most conveniently done on the
# raw ParseStream representation. In particular, CSTParser.EXPR attaches
# some types of trivia to the end of nontrivia or trivia tokens.
#
# This function reassociates trivia with nonterminal nodes to make converting
# to CSTParser.EXPR a *local* operation on green tree nodes.
function parse_for_cst(text)
stream = JuliaSyntax.ParseStream(text)

# Insert initial nothing node if necessary to anchor trailing whitespace.
if is_cst_trailing_trivia(peek(stream, skip_whitespace=false))
JuliaSyntax.bump_invisible(stream, K"nothing")
end
JuliaSyntax.parse(stream, rule=:toplevel)

# Fix up start of stream
ranges = stream.ranges
@assert kind(ranges[end]) == K"toplevel"
ranges[end] = let r = ranges[end]
TaggedRange(r.head, 1, r.last_token)
end

# Rearrange whitespace trivia tokens so that they're always *trailing*
# siblings of non-whitespace trivia tokens.
#
# This is required for later conversion to CSTParser.EXPR
tokens = stream.tokens
for (i,range) in enumerate(ranges)
first_token = range.first_token
while first_token < length(tokens) &&
is_cst_trailing_trivia(tokens[first_token])
first_token += 1
end
last_token = range.last_token
while last_token < length(tokens) &&
is_cst_trailing_trivia(tokens[last_token+1])
last_token += 1
end
ranges[i] = TaggedRange(head(range), first_token, last_token)
end

return JuliaSyntax.build_tree(JuliaSyntax.GreenNode, stream)
end

# CSTParser.EXPR equality; should be in CSTParser...
function Base.:(==)(x::CSTParser.EXPR, y::CSTParser.EXPR)
# Debugging hacks:
if x.head != y.head
@info "Trivia mismatch" x.head y.head
end
if x.trivia != y.trivia
@info "Trivia mismatch" x.trivia y.trivia
end
if x.fullspan != y.fullspan
@info "Fullspan mismatch" x y x.fullspan y.fullspan
end
if x.span != y.span
@info "Span mismatch" x y x.span y.span
end
if x.val != y.val
@info "Trivia mismatch" x.val y.val
end

return x.head == y.head &&
x.args == y.args &&
x.trivia == y.trivia &&
x.fullspan == y.fullspan &&
x.span == y.span &&
x.val == y.val &&
x.meta == y.meta
end

# Some things which work
#text = " 1 + 2 * 3 "
#text = "[ 1 ; 2 ;]"
#text = "for i=1:10\nx\ny\nend"
#text = "100.00"
text = """
function f(x,y)
s = 0
for i = 1:10
s += x - i^y
end
end
"""

# Some things which don't yet work
#
# Macro names
# text = "@A.asdf x y"
#
# Bracket nodes don't exist yet in JuliaSyntax
# text = "(a + b)"
#
# Strings have separate delimiters. Will need to put them back together.
# text = "\"str\""

source = SourceFile(text)

ex = parse_for_cst(text)
# show(stdout, MIME"text/plain"(), ex, text)

y = CSTParser.parse(text, true)
x = cst(source, ex)
x == y