Skip to content

Commit 7a42fea

Browse files
committed
feat: boolean and delimiters regex, comments
1 parent b316c33 commit 7a42fea

File tree

2 files changed

+72
-2
lines changed

2 files changed

+72
-2
lines changed

src/PyReprism/languages/c.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def keywords() -> list:
3030
@staticmethod
3131
def comment_regex() -> re.Pattern:
3232
"""
33-
Compile and return a regular expression pattern to identify different types of comments and non-comment code in Python source files.
33+
Compile and return a regular expression pattern to identify different types of comments and non-comment code in C source files.
3434
3535
:return: A compiled regex pattern with named groups to match single-line comments, multiline comments, and non-comment code elements.
3636
:rtype: re.Pattern
@@ -104,7 +104,7 @@ def remove_comments(source_code: str) -> str:
104104
@staticmethod
105105
def remove_keywords(source: str) -> str:
106106
"""
107-
Remove all Python keywords from the provided source code string.
107+
Remove all C keywords from the provided source code string.
108108
109109
:param str source: The source code string from which to remove C keywords.
110110
:return: The source code string with all C keywords removed.

src/PyReprism/languages/cpp.py

+70
Original file line numberDiff line numberDiff line change
@@ -8,36 +8,106 @@ def __init__(self):
88

99
@staticmethod
1010
def file_extension() -> str:
11+
"""
12+
Return the file extension used for C++ files.
13+
14+
:return: The file extension for C++ files.
15+
:rtype: str
16+
"""
1117
return extension.cpp
1218

1319
@staticmethod
1420
def keywords() -> list:
21+
"""
22+
Return a list of C++ keywords and built-in functions.
23+
24+
:return: A list of C++ keywords and built-in function names.
25+
:rtype: list
26+
"""
1527
keyword = 'alignas|alignof|asm|auto|bool|break|case|catch|char|char16_t|char32_t|class|compl|const|constexpr|const_cast|continue|decltype|default|delete|do|double|dynamic_cast|else|enum|explicit|export|extern|float|for|friend|goto|if|inline|int|int8_t|int16_t|int32_t|int64_t|uint8_t|uint16_t|uint32_t|uint64_t|long|mutable|namespace|new|noexcept|nullptr|operator|private|protected|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_assert|static_cast|struct|switch|template|this|thread_local|throw|try|typedef|typeid|typename|union|unsigned|using|virtual|void|volatile|wchar_t|while|true|false'.split('|')
1628
return keyword
1729

1830
@staticmethod
1931
def comment_regex():
32+
"""
33+
Compile and return a regular expression pattern to identify different types of comments and non-comment code in C source files.
34+
35+
:return: A compiled regex pattern with named groups to match single-line comments, multiline comments, and non-comment code elements.
36+
:rtype: re.Pattern
37+
"""
2038
pattern = re.compile(r'(?P<comment>//.*?$|/\*[^*]*\*+(?:[^/*][^*]*\*+)*?/)|(?P<noncomment>[^/]+)', re.DOTALL | re.MULTILINE)
2139
return pattern
2240

2341
@staticmethod
2442
def number_regex():
43+
"""
44+
Compile and return a regular expression pattern to identify numeric literals in C++ code.
45+
46+
:return: A compiled regex pattern to match C++ numeric literals, including integers, floats, and complex numbers.
47+
:rtype: re.Pattern
48+
"""
2549
pattern = re.compile(r'(?:\b0x[\da-f]+|(?:\b\d+\.?\d*|\B\.\d+)(?:e[+-]?\d+)?)[ful]*')
2650
return pattern
2751

2852
@staticmethod
2953
def operator_regex():
54+
"""
55+
Compile and return a regular expression pattern to identify C++ operators.
56+
57+
:return: A compiled regex pattern to match various C++ operators and logical keywords.
58+
:rtype: re.Pattern
59+
"""
3060
pattern = re.compile(r'--?|\+\+?|!=?|<{1,2}=?|>{1,2}=?|->|:{1,2}|={1,2}|\^|~|%|&{1,2}|\|\|?|\?|\*|\/|\b(?:and|and_eq|bitand|bitor|not|not_eq|or|or_eq|xor|xor_eq)\b')
3161
return pattern
3262

3363
@staticmethod
3464
def keywords_regex():
65+
"""
66+
Return a list of C++ keywords and built-in functions.
67+
68+
:return: A list of C++ keywords and built-in function names.
69+
:rtype: list
70+
"""
3571
return re.compile(r'\b(' + '|'.join(CPP.keywords()) + r')\b')
3672

73+
@staticmethod
74+
def boolean_regex() -> re.Pattern:
75+
"""
76+
Compile and return a regular expression pattern to identify C++ boolean literals.
77+
78+
:return: A compiled regex pattern to match C++ boolean literals.
79+
:rtype: re.Pattern
80+
"""
81+
return re.compile(r'\b(?:true|false)\b')
82+
83+
@staticmethod
84+
def delimiters_regex() -> re.Pattern:
85+
"""
86+
Compile and return a regular expression pattern to identify C and C++ delimiters.
87+
88+
:return: A compiled regex pattern to match C and C++ delimiters.
89+
:rtype: re.Pattern
90+
"""
91+
return re.compile(r'[()\[\]{}.,:;@<>*&]')
92+
3793
@staticmethod
3894
def remove_comments(source_code: str) -> str:
95+
"""
96+
Remove comments from the provided C++ source code string.
97+
98+
:param str source_code: The C++ source code from which to remove comments.
99+
:return: The source code with all comments removed.
100+
:rtype: str
101+
"""
39102
return CPP.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip()
40103

41104
@staticmethod
42105
def remove_keywords(source: str):
106+
"""
107+
Remove all C++ keywords from the provided source code string.
108+
109+
:param str source: The source code string from which to remove C++ keywords.
110+
:return: The source code string with all C++ keywords removed.
111+
:rtype: str
112+
"""
43113
return re.sub(re.compile(CPP.keywords_regex()), '', source)

0 commit comments

Comments
 (0)