Skip to content

Commit b2fcbe9

Browse files
committed
feat: comments, boolean and delimiters regex
1 parent 00876bd commit b2fcbe9

File tree

4 files changed

+47
-12
lines changed

4 files changed

+47
-12
lines changed

src/PyReprism/languages/cpp.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def keywords() -> list:
2828
return keyword
2929

3030
@staticmethod
31-
def comment_regex():
31+
def comment_regex() -> re.Pattern:
3232
"""
3333
Compile and return a regular expression pattern to identify different types of comments and non-comment code in C source files.
3434
@@ -39,7 +39,7 @@ def comment_regex():
3939
return pattern
4040

4141
@staticmethod
42-
def number_regex():
42+
def number_regex() -> re.Pattern:
4343
"""
4444
Compile and return a regular expression pattern to identify numeric literals in C++ code.
4545
@@ -50,7 +50,7 @@ def number_regex():
5050
return pattern
5151

5252
@staticmethod
53-
def operator_regex():
53+
def operator_regex() -> re.Pattern:
5454
"""
5555
Compile and return a regular expression pattern to identify C++ operators.
5656
@@ -61,7 +61,7 @@ def operator_regex():
6161
return pattern
6262

6363
@staticmethod
64-
def keywords_regex():
64+
def keywords_regex() -> re.Pattern:
6565
"""
6666
Return a list of C++ keywords and built-in functions.
6767
@@ -102,7 +102,7 @@ def remove_comments(source_code: str) -> str:
102102
return CPP.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip()
103103

104104
@staticmethod
105-
def remove_keywords(source: str):
105+
def remove_keywords(source: str) -> str:
106106
"""
107107
Remove all C++ keywords from the provided source code string.
108108

src/PyReprism/languages/python.py

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33

44

55
class Python:
6+
"""
7+
This is the class for processing Python source code
8+
"""
69
def __init__(self):
710
pass
811

src/PyReprism/languages/scala.py

+37-5
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,58 @@ def keywords() -> list:
1616
return keyword
1717

1818
@staticmethod
19-
def comment_regex():
19+
def comment_regex() -> re.Pattern:
2020
pattern = re.compile(r'(?P<comment>//.*?$|/\*.*?\*/|/\*.*?$|^.*?\*/|[{}]+)|(?P<noncomment>\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^/\'"{}]*)', re.DOTALL | re.MULTILINE)
2121
return pattern
2222

2323
@staticmethod
24-
def number_regex():
24+
def number_regex() -> re.Pattern:
2525
pattern = re.compile(r'\b0x[\da-f]*\.?[\da-f]+|(?:\b\d+\.?\d*|\B\.\d+)(?:e\d+)?[dfl]?')
2626
return pattern
2727

2828
@staticmethod
29-
def operator_regex():
29+
def operator_regex() -> re.Pattern:
3030
pattern = re.compile(r'(^|[^.])(?:\+[+=]?|-[-=]?|!=?|<<?=?|>>?>?=?|==?|&[&=]?|\|[|=]?|\*=?|\/=?|%=?|\^=?|[?:~])')
3131
return pattern
3232

3333
@staticmethod
34-
def keywords_regex():
34+
def keywords_regex() -> re.Pattern:
3535
return re.compile(r'\b(' + '|'.join(Scala.keywords()) + r')\b')
3636

37+
@staticmethod
38+
def boolean_regex() -> re.Pattern:
39+
"""
40+
Compile and return a regular expression pattern to identify Scala boolean literals.
41+
42+
This function generates a regular expression that matches the Scala boolean literals `true`, `false`, and the special constant `null`.
43+
44+
:return: A compiled regex pattern to match Scala boolean literals and `null`.
45+
:rtype: re.Pattern
46+
"""
47+
return re.compile(r'\b(?:true|false|null)\b')
48+
49+
@staticmethod
50+
def delimiters_regex() -> re.Pattern:
51+
"""
52+
Compile and return a regular expression pattern to identify Scala language delimiters.
53+
54+
This function generates a regular expression that matches Scala language delimiters, which include parentheses `()`, brackets `[]`, braces `{}`, commas `,`, colons `:`, periods `.`, semicolons `;`, angle brackets `<`, `>`, the question mark `?`, and the underscore `_`.
55+
56+
:return: A compiled regex pattern to match Scala delimiters.
57+
:rtype: re.Pattern
58+
"""
59+
return re.compile(r'[()\[\]{}.,:;<>?_]')
60+
3761
@staticmethod
3862
def remove_comments(source_code: str, isList: bool = False) -> str:
63+
"""
64+
Remove comments from the provided Java source code string.
65+
66+
:param str source_code: The Java source code from which to remove comments.
67+
:return: The source code with all comments removed.
68+
:rtype: str
69+
"""
70+
return Scala.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip()
3971
result = []
4072
for match in Scala.comment_regex().finditer(source_code):
4173
if match.group('noncomment'):
@@ -45,5 +77,5 @@ def remove_comments(source_code: str, isList: bool = False) -> str:
4577
return ''.join(result)
4678

4779
@staticmethod
48-
def remove_keywords(source: str):
80+
def remove_keywords(source: str) -> str:
4981
return re.sub(re.compile(Scala.keywords_regex()), '', source)

src/PyReprism/utils/normalizer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ def __init__():
66
pass
77

88
@staticmethod
9-
def whitespaces_regex():
9+
def whitespaces_regex() -> re.Pattern:
1010
return re.compile(r'[\t\x0b\x0c\r ]+|^\s*\n', re.MULTILINE)
1111

1212
@staticmethod
13-
def remove_whitespaces(source: str):
13+
def remove_whitespaces(source: str) -> str:
1414
pattern = re.sub(Normalizer.whitespaces_regex(), '', source)
1515
return pattern.strip()

0 commit comments

Comments
 (0)