Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
95c98c7
feat: Add name resolution
MarcellPerger1 Nov 24, 2024
872c398
feat: Add support for parameter names
MarcellPerger1 Nov 25, 2024
8dd7364
fix: Make `global` decls actually global
MarcellPerger1 Nov 27, 2024
b087781
feat: Add is_param attribute to `NameInfo`
MarcellPerger1 Nov 27, 2024
5ad2020
feat: Greatly reduce number of allowed types
MarcellPerger1 Nov 27, 2024
4d4f926
fix: Fix global declarations (again)
MarcellPerger1 Nov 27, 2024
72bc114
feat: Add is_bool to VarInfo
MarcellPerger1 Nov 27, 2024
6158e2f
feat: Tidy up NameInfo stuff a bit (separate BoolInfo, ValInfo)
MarcellPerger1 Nov 28, 2024
ced8702
refactor: Don't manually `sys.intern` PARAM_TYPES
MarcellPerger1 Nov 28, 2024
519f797
refactor: Use new TypeInfo architecture
MarcellPerger1 Nov 28, 2024
afa6fc1
refactor: Remove unused import
MarcellPerger1 Nov 28, 2024
0fc25bb
fix: Actually attach arg types to functions
MarcellPerger1 Nov 29, 2024
2106670
feat: Add ParamInfo to FuncInfo (names (+types) of all params)
MarcellPerger1 Nov 29, 2024
c6ce88f
fix: Remove outdated assertion error
MarcellPerger1 Nov 29, 2024
77c730b
feat: Add custom pformat.py module as a better version of pprint
MarcellPerger1 Nov 30, 2024
bfede03
fix: Fix various bugs in pformat.py
MarcellPerger1 Nov 30, 2024
4bb71d2
refactor: Add `__all__` to pformat.py
MarcellPerger1 Nov 30, 2024
555beaf
fix: Very hacky fix for Textmate bundle syntax highlighting
MarcellPerger1 Nov 30, 2024
f0b941c
refactor: Add default stream as stdout in pformat.py
MarcellPerger1 Nov 30, 2024
45a102f
test: Add E2E test for NameResolver
MarcellPerger1 Nov 30, 2024
0bc4c7a
fix(test): Add the actual file
MarcellPerger1 Nov 30, 2024
a46b141
test(name-resolve): Add some unittests for NameResolver
MarcellPerger1 Nov 30, 2024
36bce5c
test: More tests for errors in NameResolver
MarcellPerger1 Nov 30, 2024
d202962
test: Add tests for pforma
MarcellPerger1 Dec 8, 2024
21a8b26
test: More tests for pformat
MarcellPerger1 Dec 8, 2024
6c87555
fix(pformat): Fix pformat
MarcellPerger1 Dec 8, 2024
49cf266
test: Basic tests for set
MarcellPerger1 Dec 8, 2024
271fadb
fix: Fix pformat.py
MarcellPerger1 Dec 8, 2024
3e778f2
test: More tests for set/frozenset
MarcellPerger1 Dec 8, 2024
c1ac48d
fix(pformat): Fix pformat.py bug (another one)
MarcellPerger1 Dec 8, 2024
e4d163c
test: Add tests for long simple lines
MarcellPerger1 Dec 8, 2024
9695754
refactor: Remove indent_start attribute
MarcellPerger1 Dec 8, 2024
ff3c488
refactor: Rename param, fix outdated stuff
MarcellPerger1 Dec 8, 2024
ad2705e
refactor: Use some keyword args
MarcellPerger1 Dec 8, 2024
784a729
refactor: Add a docstring
MarcellPerger1 Dec 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions main_example_2.st
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
global false = 0;
global true = 1; // temporary
/*intrinsic*/def len(/*list*/val a) {}
/*intrinsic*/def delitem(/*list*/val a, number idx) {}

global[] psHeap = [];
global[] _psHeap_freelist = [];

def _PsHeap_FinalizeObj(number ptr) {
// Dispatch finalizer here, decref components, etc.
}

global __result__; // TODO: some syntax sugar for return?
def PsHeap_AllocWord() {
if len(_psHeap_freelist) > 0 {
__result__ = _psHeap_freelist[0];
delitem(_psHeap_freelist, 0);
if COUNT_REFS {
refStack[__result__] = 1;
}
} else {
// TODO: figure out list syntax
psHeap.append('');
__result__ = len(psHeap);
if COUNT_REFS {
refStack.append(1);
}
}
}
/** Note: Doesn't finalize! */
def PsHeap_FreeWord(number ptr) {
psHeap[ptr] = ''; // Next person can't read what was here
if COUNT_REFS {
refStack[ptr] = 0;
}
_psHeap_freelist.append(ptr);
}
def PsHeap_DeleteWord(number ptr) {
_PsHeap_FinalizeObj(ptr);
PsHeap_FreeWord(ptr);
}

global[] refStack = [];
def decref(number ptr) {
if refStack[ptr] >= 0 { // negative refcount indicates immortal objects
refStack[ptr] -= 1;
if refStack[ptr] == 0 {
PsHeap_DeleteWord(ptr);
}
}
}
def incref(number ptr) {
if refStack[ptr] >= 0 { // negative refcount indicates immortal objects
refStack[ptr] += 1;
}
}
// implement GC that can do cycles


global[] stack = [];
global COUNT_REFS = false;

// opcodes from python version 3.8:

// opcodes from: https://unpyc.sourceforge.net/Opcodes.html
// and https://harrisonmorgan.dev/2020/04/13/learning-cpython-bytecode-instructions/
// also https://docs.python.org/3.8/library/dis.html#opcode-NOP for description
// and https://github.com/python/cpython/blob/3.8/Lib/opcode.py#L58 for numbers

def NOP() {}

def POP_TOP() {
if COUNT_REFS {
decref(stack[len(stack)]);
}
delitem(stack, len(stack));
}

def ROT_TWO() {
let tos_idx = len(stack);
let tos0 = stack[tos_idx];
stack[tos_idx] = stack[tos_idx - 1];
stack[tos_idx - 1] = tos0;
}

def ROT_THREE() {
// top = top-1
// top-1 = top-2
// top-2 = top
// i.e. move 2nd and 3rd up and move top to 3rd
let tos_idx = len(stack);
let tos0 = stack[tos_idx];
stack[tos_idx] = stack[tos_idx - 1];
stack[tos_idx - 1] = stack[tos_idx - 2];
stack[tos_idx - 2] = tos0;
}

def DUP_TOP() {
let value = stack[len(stack)];
stack.append(value);
if COUNT_REFS {
incref(value);
}
}

def DUP_TOP_TWO() {
let tos_idx = len(stack);
let tos0 = stack[tos_idx];
let tos1 = stack[tos_idx - 1];
stack.append(tos1);
stack.append(tos0); // keep tos0 at top
if COUNT_REFS {
incref(tos0);
incref(tos1);
}
}

def ROT_FOUR() {
// tos3 = tos
// tos = tos1
// tos1 = tos2
// tos2 = tos3
let tos_idx = len(stack);
let tos0 = stack[tos_idx];
stack[tos_idx] = stack[tos_idx - 1];
stack[tos_idx - 1] = stack[tos_idx - 2];
stack[tos_idx - 2] = stack[tos_idx - 3];
stack[tos_idx - 3] = tos0;
}
Empty file added parser/typecheck/__init__.py
Empty file.
228 changes: 228 additions & 0 deletions parser/typecheck/typecheck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Callable, TypeVar

from parser.astgen.ast_node import (
AstNode, walk_ast, WalkableT, WalkerCallType, AstIdent, AstDeclNode,
AstDefine, VarDeclType, VarDeclScope)
from parser.astgen.astgen import AstGen
from parser.common import BaseLocatedError, StrRegion
from util import flatten_force

WT = TypeVar('WT', bound=WalkableT)
VT = TypeVar('VT')


class FilteredWalker:
def __init__(self):
self.enter_cbs: dict[type[WT] | type, list[Callable[[WT], bool | None]]] = {}
self.exit_cbs: dict[type[WT] | type, list[Callable[[WT], bool | None]]] = {}
self.both_cbs: dict[type[WT] | type, list[
Callable[[WT, WalkerCallType], bool | None]]] = {}

def register_both(self, t: type[WT], fn: Callable[[WT, WalkerCallType], bool | None]):
self.both_cbs.setdefault(t, []).append(fn)
return self

def register_enter(self, t: type[WT], fn: Callable[[WT], bool | None]):
self.enter_cbs.setdefault(t, []).append(fn)
return self

def register_exit(self, t: type[WT], fn: Callable[[WT], bool | None]):
self.exit_cbs.setdefault(t, []).append(fn)
return self

def __call__(self, o: WalkableT, call_type: WalkerCallType):
result = None
# Call more specific ones first
specific_cbs = self.enter_cbs if call_type == WalkerCallType.PRE else self.exit_cbs
for fn in self._get_funcs(specific_cbs, type(o)):
result = fn(o) or result
for fn in self._get_funcs(self.both_cbs, type(o)):
result = fn(o, call_type) or result
return result

@classmethod
def _get_funcs(cls, mapping: dict[type[WT] | type, list[VT]], tp: type[WT]) -> list[VT]:
"""Also looks at superclasses/MRO"""
return flatten_force(mapping.get(sub, []) for sub in tp.mro())


@dataclass
class TypeInfo:
def __post_init__(self):
assert type(self) != TypeInfo, "Cannot instantiate TypeInfo directly,use a subclass"


@dataclass
class ValType(TypeInfo):
pass


@dataclass
class BoolType(TypeInfo):
pass


@dataclass
class ListType(TypeInfo):
pass


@dataclass
class VoidType(TypeInfo):
"""The ``void`` type - represents 'there must not be a value here'.

For example, this is the return type of function that don't return anything
(e.g. all regular user-defined scratch functions).
"""


@dataclass
class FunctionType(TypeInfo):
arg_types: list[TypeInfo]
ret_type: TypeInfo


@dataclass
class NameInfo:
decl_scope: Scope
ident: str
tp_info: TypeInfo
# node: AstNode # <-- Why do we need this?
is_param: bool = field(default=False, kw_only=True)


@dataclass
class FuncInfo(NameInfo):
tp_info: FunctionType # Overrides types (doesn't change order)
params_info: list[ParamInfo]
# Can't just pass default_factory=Scope as it is only defined below
subscope: Scope = field(default_factory=lambda: Scope())

@classmethod
def from_param_info(
cls, decl_scope: Scope, ident: str, params_info: list[ParamInfo],
ret_type: TypeInfo, subscope: Scope = None):
subscope = subscope or Scope()
tp_info = FunctionType([p.tp for p in params_info], ret_type)
return cls(decl_scope, ident, tp_info, params_info, subscope)


@dataclass
class ParamInfo:
name: str
tp: TypeInfo


@dataclass
class Scope:
declared: dict[str, NameInfo] = field(default_factory=dict)
used: dict[str, NameInfo] = field(default_factory=dict)
"""Add references to outer scopes' variables that we use.
(so type codegen/type-checker knows what each AstIdent refers to)"""


class NameResolutionError(BaseLocatedError):
pass


# The reason `let` isn't used is because we don't want to imply similarity
# between parameters as local variables (where none exists in Scratch).
# Also, we might want to use `let` later as a modifier to bind it to
# an actual local var.
# Don't need to `sys.intern` these manually as Python automatically does
# this for literals.
PARAM_TYPES = {'number', 'string', 'val', 'bool'}


# Variables:
# - We can prevent usages before the variable is declared in 2 ways:
# - Based on time: very sensible, like JS, but requires too many runtime features
# - Based on location: somewhat makes sense except for inner functions -
# they may be called later so should be able to access any variables.
# - Or we can just ignore it (e.g. `var` in JS) and pretend everything was
# declared at the top (but not assigned to - i.e. hoist `var foo;` to top).
# To minimise accidental errors, option 1.2 is best
# (errors shouldn't pass silently, and that method requires no special runtime)
class NameResolver:
def __init__(self, astgen: AstGen):
self.astgen = astgen
self.src = self.astgen.src
self.top_scope: Scope | None = None

def _init(self):
self.ast = self.astgen.parse()
self.top_scope = Scope()

def run(self):
if self.top_scope:
return self.top_scope
self._init()
self.run_on_new_scope(self.ast.statements, curr_scope=self.top_scope)
return self.top_scope

def run_on_new_scope(self, block: list[AstNode], parent_scopes: list[Scope] = None,
curr_scope: Scope = None):
def enter_ident(n: AstIdent):
for s in scope_stack[::-1]: # Inefficient, creates a copy!
if info := s.declared.get(n.id):
curr_scope.used[n.id] = info
return
raise self.err(f"Name '{n.id}' is not defined", n.region)

def enter_decl(n: AstDeclNode):
# Need semi-special logic here to prevent walking it walking
# the AstIdent that is currently being declared.
AstNode.walk_obj(n.value, walker) # Don't walk `n.ident`
# Do this after walking (that is when the name is bound)
ident = n.ident.id
target_scope = curr_scope if n.scope == VarDeclScope.LET else self.top_scope
if ident in target_scope.declared:
raise self.err("Variable already declared", n.region)
target_scope.declared[ident] = NameInfo(target_scope, ident, (
ValType() if n.type == VarDeclType.VARIABLE else ListType()))
return True

def enter_fn_decl(fn: AstDefine):
ident = fn.ident.id
if ident in curr_scope.declared:
raise self.err("Function already declared", fn.ident.region)
subscope = Scope()
params: list[ParamInfo] = []
for tp, param in fn.params:
if tp.id not in PARAM_TYPES:
raise self.err("Unknown parameter type", tp.region)
if param.id in subscope.declared:
raise self.err("There is already a parameter of this name", param.region)
tp = BoolType() if param.id == 'bool' else ValType()
subscope.declared[param.id] = NameInfo(subscope, param.id, tp, is_param=True)
params.append(ParamInfo(param.id, tp))
curr_scope.declared[ident] = info = FuncInfo.from_param_info(
curr_scope, ident, params,
ret_type=VoidType(), subscope=subscope)
inner_funcs.append((info, fn)) # Store funcs for later walking
# Skip walking body, only walk inner after collecting all declared
# variables in outer scope so function can use all variables
# declared in outer scope - even the ones declared below it)
return True

curr_scope = curr_scope or Scope()
scope_stack = parent_scopes or []
scope_stack.append(curr_scope)
inner_funcs: list[tuple[FuncInfo, AstDefine]] = []
# Walk self
walker = (FilteredWalker()
.register_enter(AstIdent, enter_ident)
.register_enter(AstDeclNode, enter_decl)
.register_enter(AstDefine, enter_fn_decl))
walk_ast(block, walker)
# Walk sub-functions
for fn_info, fn_decl in inner_funcs:
fn_info.subscope = self.run_on_new_scope(
fn_decl.body, scope_stack, fn_info.subscope)
return scope_stack.pop() # Remove current scope from stack & return it

def err(self, msg: str, region: StrRegion):
return NameResolutionError(msg, region, self.src)
13 changes: 13 additions & 0 deletions test/common/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
from parser.cst.cstgen import CstGen, LocatedCstError
from parser.lexer import Tokenizer
from parser.lexer.tokens import Token, OpToken
from parser.typecheck.typecheck import Scope, NameResolver, NameResolutionError
from test.common.snapshottest import SnapshotTestCase
from test.common.utils import TestCaseUtils
from util.pformat import pformat


def _strict_boundary_kwargs():
Expand Down Expand Up @@ -53,6 +55,7 @@ def setUpClass(cls) -> None:
cls.format_dispatch.setdefault(Leaf, cls._tree_format)
cls.format_dispatch.setdefault(Node, cls._tree_format)
cls.format_dispatch.setdefault(AstNode, cls._tree_format)
cls.format_dispatch.setdefault(Scope, pformat)
super().setUpClass()

@classmethod
Expand Down Expand Up @@ -124,3 +127,13 @@ def assertFailsGracefullyAST(self, src: str):
with self.assertRaises(LocatedAstError) as ctx:
a.parse()
return ctx.exception

# noinspection PyMethodMayBeStatic
def getNameResolver(self, src: str):
return NameResolver(AstGen(CstGen(Tokenizer(src))))

def assertNameResolveError(self, src: str):
nr = self.getNameResolver(src)
with self.assertRaises(NameResolutionError) as ctx:
nr.run()
return ctx.exception
Loading