Skip to content

Commit a224282

Browse files
committed
feat: comments, boolean and delimiters regex
1 parent 79a2160 commit a224282

File tree

2 files changed

+147
-9
lines changed

2 files changed

+147
-9
lines changed

src/PyReprism/languages/javascript.py

+77-5
Original file line numberDiff line numberDiff line change
@@ -8,34 +8,99 @@ def __init__():
88

99
@staticmethod
1010
def file_extension() -> str:
11+
"""
12+
Return the file extension used for JavaScript files.
13+
14+
:return: The file extension for Java files.
15+
:rtype: str
16+
"""
1117
return extension.javascript
1218

1319
@staticmethod
1420
def keywords() -> list:
21+
"""
22+
Return a list of JavaScript keywords and built-in functions.
23+
24+
:return: A list of JavaScript keywords and built-in function names.
25+
:rtype: list
26+
"""
1527
keyword = 'as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|var|void|while|with|yield'.split('|')
1628
return keyword
1729

1830
@staticmethod
19-
def comment_regex():
31+
def comment_regex() -> re.Pattern:
32+
"""
33+
Compile and return a regular expression pattern to identify different types of comments and non-comment code in JavaScript source files.
34+
35+
:return: A compiled regex pattern with named groups to match single-line comments, multiline comments, and non-comment code elements.
36+
:rtype: re.Pattern
37+
"""
2038
pattern = re.compile(r'(?P<comment>//.*?$|/\*.*?\*/|/\*.*?$|^.*?\*/|[{}]+)|(?P<noncomment>\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^/\'"{}]*)', re.DOTALL | re.MULTILINE)
2139
return pattern
2240

2341
@staticmethod
24-
def number_regex():
42+
def number_regex() -> re.Pattern:
43+
"""
44+
Compile and return a regular expression pattern to identify numeric literals in JavaScript code.
45+
46+
:return: A compiled regex pattern to match JavaScript numeric literals, including integers, floats, and complex numbers.
47+
:rtype: re.Pattern
48+
"""
2549
pattern = re.compile(r'\b(?:0[xX][\dA-Fa-f]+|0[bB][01]+|0[oO][0-7]+|NaN|Infinity)\b|(?:\b\d+\.?\d*|\B\.\d+)(?:[Ee][+-]?\d+)?')
2650
return pattern
2751

2852
@staticmethod
29-
def operator_regex():
53+
def operator_regex() -> re.Pattern:
54+
"""
55+
Compile and return a regular expression pattern to identify JavaScript operators.
56+
57+
:return: A compiled regex pattern to match various JavaScript operators and logical keywords.
58+
:rtype: re.Pattern
59+
"""
3060
pattern = re.compile(r'-[-=]?|\+[+=]?|!=?=?|<<?=?|>>?>?=?|=(?:==?|>)?|&[&=]?|\|[|=]?|\*\*?=?|\/=?|~|\^=?|%=?|\?|\.{3}')
3161
return pattern
3262

3363
@staticmethod
34-
def keywords_regex():
64+
def delimiters_regex() -> re.Pattern:
65+
"""
66+
Compile and return a regular expression pattern to identify JavaScript delimiters.
67+
68+
This function generates a regular expression that matches JavaScript delimiters, including parentheses `()`, brackets `[]`, braces `{}`, commas `,`, colons `:`, periods `.`, semicolons `;`, at symbols `@`, as well as angle brackets `<` and `>`.
69+
70+
:return: A compiled regex pattern to match JavaScript delimiters.
71+
:rtype: re.Pattern
72+
"""
73+
return re.compile(r'[()\[\]{}.,:;@<>]')
74+
75+
@staticmethod
76+
def keywords_regex() -> re.Pattern:
77+
"""
78+
Compile and return a regular expression pattern to identify JavaScript keywords.
79+
80+
:return: A compiled regex pattern to match JavaScript keywords.
81+
:rtype: re.Pattern
82+
"""
3583
return re.compile(r'\b(' + '|'.join(JavaScript.keywords()) + r')\b')
3684

85+
@staticmethod
86+
def boolean_regex() -> re.Pattern:
87+
"""
88+
Compile and return a regular expression pattern to identify JavaScript boolean literals.
89+
90+
:return: A compiled regex pattern to match JavaScript boolean literals.
91+
:rtype: re.Pattern
92+
"""
93+
return re.compile(r'\b(?:true|false)\b')
94+
3795
@staticmethod
3896
def remove_comments(source_code: str, isList: bool = False) -> str:
97+
"""
98+
Remove comments from the provided JavaScript source code string.
99+
100+
:param str source_code: The JavaScript source code from which to remove comments.
101+
:return: The source code with all comments removed.
102+
:rtype: str
103+
"""
39104
result = []
40105
for match in JavaScript.comment_regex().finditer(source_code):
41106
if match.group('noncomment'):
@@ -45,5 +110,12 @@ def remove_comments(source_code: str, isList: bool = False) -> str:
45110
return ''.join(result)
46111

47112
@staticmethod
48-
def remove_keywords(source: str):
113+
def remove_keywords(source: str) -> str:
114+
"""
115+
Remove all JavaScript keywords from the provided source code string.
116+
117+
:param str source: The source code string from which to remove JavaScript keywords.
118+
:return: The source code string with all JavaScript keywords removed.
119+
:rtype: str
120+
"""
49121
return re.sub(re.compile(JavaScript.keywords_regex()), '', source)

src/PyReprism/languages/jsx.py

+70-4
Original file line numberDiff line numberDiff line change
@@ -8,34 +8,93 @@ def __init__():
88

99
@staticmethod
1010
def file_extension() -> str:
11+
"""
12+
Return the file extension used for JSX files.
13+
14+
:return: The file extension for JSX files.
15+
:rtype: str
16+
"""
1117
return extension.jsx
1218

1319
@staticmethod
1420
def keywords() -> list:
21+
"""
22+
Return a list of JSX keywords and built-in functions.
23+
24+
:return: A list of JSX keywords and built-in function names.
25+
:rtype: list
26+
"""
1527
keyword = 'as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|var|void|while|with|yield'.split('|')
1628
return keyword
1729

1830
@staticmethod
19-
def comment_regex():
31+
def comment_regex() -> re.Pattern:
32+
"""
33+
Compile and return a regular expression pattern to identify different types of comments and non-comment code in JSX source files.
34+
35+
:return: A compiled regex pattern with named groups to match single-line comments, multiline comments, and non-comment code elements.
36+
:rtype: re.Pattern
37+
"""
2038
pattern = re.compile(r'(?P<comment>//.*?$|/\*[\s\S]*?\*/|\{/\*[\s\S]*?\*/\}|/\*.*?$|^.*?\*/|/\*\*[\s\S]*?\*/)|(?P<noncomment>\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^/\'"]*)', re.DOTALL | re.MULTILINE)
2139
return pattern
2240

2341
@staticmethod
24-
def number_regex():
42+
def number_regex() -> re.Pattern:
43+
"""
44+
Compile and return a regular expression pattern to identify numeric literals in JSX code.
45+
46+
:return: A compiled regex pattern to match JSX numeric literals, including integers, floats, and complex numbers.
47+
:rtype: re.Pattern
48+
"""
2549
pattern = re.compile(r'\b(?:0[xX][\dA-Fa-f]+|0[bB][01]+|0[oO][0-7]+|NaN|Infinity)\b|(?:\b\d+\.?\d*|\B\.\d+)(?:[Ee][+-]?\d+)?')
2650
return pattern
2751

2852
@staticmethod
29-
def operator_regex():
53+
def operator_regex() -> re.Pattern:
54+
"""
55+
Compile and return a regular expression pattern to identify JSX operators.
56+
57+
:return: A compiled regex pattern to match various JSX operators and logical keywords.
58+
:rtype: re.Pattern
59+
"""
3060
pattern = re.compile(r'-[-=]?|\+[+=]?|!=?=?|<<?=?|>>?>?=?|=(?:==?|>)?|&[&=]?|\|[|=]?|\*\*?=?|\/=?|~|\^=?|%=?|\?|\.{3}')
3161
return pattern
3262

3363
@staticmethod
3464
def keywords_regex():
3565
return re.compile(r'\b(' + '|'.join(Jsx.keywords()) + r')\b')
3666

67+
@staticmethod
68+
def delimiters_regex() -> re.Pattern:
69+
"""
70+
Compile and return a regular expression pattern to identify JSX delimiters.
71+
72+
This function generates a regular expression that matches JSX delimiters, including parentheses `(`, brackets `[]`, braces `{}`, commas `,`, colons `:`, periods `.`, semicolons `;`, at symbols `@`, as well as angle brackets `<` and `>`.
73+
74+
:return: A compiled regex pattern to match JSX delimiters.
75+
:rtype: re.Pattern
76+
"""
77+
return re.compile(r'[()\[\]{}.,:;@<>]')
78+
79+
@staticmethod
80+
def boolean_regex() -> re.Pattern:
81+
"""
82+
Compile and return a regular expression pattern to identify JSX boolean literals.
83+
84+
:return: A compiled regex pattern to match JSX boolean literals.
85+
:rtype: re.Pattern
86+
"""
87+
return re.compile(r'\b(?:true|false|null)\b')
88+
3789
@staticmethod
3890
def remove_comments(source_code: str, isList: bool = False) -> str:
91+
"""
92+
Remove comments from the provided JSX source code string.
93+
94+
:param str source_code: The JSX source code from which to remove comments.
95+
:return: The source code with all comments removed.
96+
:rtype: str
97+
"""
3998
result = []
4099
for match in Jsx.comment_regex().finditer(source_code):
41100
if match.group('noncomment'):
@@ -45,5 +104,12 @@ def remove_comments(source_code: str, isList: bool = False) -> str:
45104
return ''.join(result)
46105

47106
@staticmethod
48-
def remove_keywords(source: str):
107+
def remove_keywords(source: str) -> str:
108+
"""
109+
Remove all JSX keywords from the provided source code string.
110+
111+
:param str source: The source code string from which to remove JSX keywords.
112+
:return: The source code string with all JSX keywords removed.
113+
:rtype: str
114+
"""
49115
return re.sub(re.compile(Jsx.keywords_regex()), '', source)

0 commit comments

Comments
 (0)