Skip to content

Commit 3baab4e

Browse files
committed
feat: comments, boolean and delimiters regex
1 parent a224282 commit 3baab4e

File tree

1 file changed

+80
-5
lines changed

1 file changed

+80
-5
lines changed

src/PyReprism/languages/php.py

+80-5
Original file line numberDiff line numberDiff line change
@@ -8,34 +8,102 @@ def __init__():
88

99
@staticmethod
1010
def file_extension() -> str:
11+
"""
12+
Return the file extension used for PHP files.
13+
14+
:return: The file extension for PHP files.
15+
:rtype: str
16+
"""
1117
return extension.php
1218

1319
@staticmethod
1420
def keywords() -> list:
21+
"""
22+
Return a list of PHP keywords and built-in functions.
23+
24+
:return: A list of PHP keywords and built-in function names.
25+
:rtype: list
26+
"""
1527
keyword = 'and|or|xor|array|as|break|case|cfunction|class|const|continue|declare|default|die|do|else|elseif|enddeclare|endfor|endforeach|endif|endswitch|endwhile|extends|for|foreach|function|include|include_once|global|if|new|return|static|switch|use|require|require_once|var|while|abstract|interface|public|implements|private|protected|parent|throw|null|echo|print|trait|namespace|final|yield|goto|instanceof|finally|try|catch'.split('|')
1628
return keyword
1729

1830
@staticmethod
19-
def comment_regex():
31+
def comment_regex() -> re.Pattern:
32+
"""
33+
Compile and return a regular expression pattern to identify different types of comments and non-comment code in PHP source files.
34+
35+
:return: A compiled regex pattern with named groups to match single-line comments, multiline comments, and non-comment code elements.
36+
:rtype: re.Pattern
37+
"""
2038
pattern = re.compile(r'(?P<comment>#.*?$|//.*?$|[{}]+)|(?P<multilinecomment>/\*.*?\*/)|(?P<noncomment>\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^#/\'"{}]*)', re.DOTALL | re.MULTILINE)
2139
return pattern
2240

2341
@staticmethod
24-
def number_regex():
42+
def number_regex() -> re.Pattern:
43+
"""
44+
Compile and return a regular expression pattern to identify numeric literals in PHP code.
45+
46+
:return: A compiled regex pattern to match PHP numeric literals, including integers, floats, and complex numbers.
47+
:rtype: re.Pattern
48+
"""
2549
pattern = re.compile(r'(?:\b(?=\d)|\B(?=\.))(?:0[bo])?(?:(?:\d|0x[\da-f])[\da-f]*\.?\d*|\.\d+)(?:e[+-]?\d+)?j?\b', re.IGNORECASE)
2650
return pattern
2751

2852
@staticmethod
29-
def operator_regex():
53+
def operator_regex() -> re.Pattern:
54+
"""
55+
Compile and return a regular expression pattern to identify PHP operators.
56+
57+
:return: A compiled regex pattern to match various PHP operators and logical keywords.
58+
:rtype: re.Pattern
59+
"""
3060
pattern = re.compile(r'[-+%=]=?|!=|\*\*?=?|\/\/?=?|<[<=>]?|>[=>]?|[&|^~]|\b(?:or|and|not)\b')
3161
return pattern
3262

3363
@staticmethod
34-
def keywords_regex():
64+
def keywords_regex() -> re.Pattern:
65+
"""
66+
Compile and return a regular expression pattern to identify PHP keywords.
67+
68+
:return: A compiled regex pattern to match PHP keywords.
69+
:rtype: re.Pattern
70+
"""
3571
return re.compile(r'\b(' + '|'.join(PHP.keywords()) + r')\b', re.IGNORECASE)
3672

73+
@staticmethod
74+
def delimiters_regex() -> re.Pattern:
75+
"""
76+
Compile and return a regular expression pattern to identify PHP delimiters.
77+
78+
This function generates a regular expression that matches PHP delimiters, including parentheses `()`, brackets `[]`, braces `{}`, commas `,`, colons `:`, periods `.`, semicolons `;`, at symbols `@`, angle brackets `<` and `>`, as well as PHP-specific tokens like `$` for variables.
79+
80+
:return: A compiled regex pattern to match PHP delimiters.
81+
:rtype: re.Pattern
82+
"""
83+
return re.compile(r'[()\[\]{}.,:;@<>$]')
84+
85+
@staticmethod
86+
def boolean_regex() -> re.Pattern:
87+
"""
88+
Compile and return a regular expression pattern to identify PHP boolean literals.
89+
90+
This function generates a regular expression that matches the PHP boolean literals `true`, `false`, and the special constant `null`. The matching is case-insensitive, as PHP boolean literals are not case-sensitive.
91+
92+
:return: A compiled regex pattern to match PHP boolean literals and `null`.
93+
:rtype: re.Pattern
94+
"""
95+
return re.compile(r'\b(?:true|false|null)\b', re.IGNORECASE)
96+
3797
@staticmethod
3898
def remove_comments(source_code: str, isList: bool = False) -> str:
99+
"""
100+
Remove comments from the provided PHP source code string.
101+
102+
:param str source_code: The PHP source code from which to remove comments.
103+
:return: The source code with all comments removed.
104+
:rtype: str
105+
"""
106+
return PHP.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip()
39107
result = []
40108
for match in PHP.comment_regex().finditer(source_code):
41109
if match.group('noncomment'):
@@ -45,5 +113,12 @@ def remove_comments(source_code: str, isList: bool = False) -> str:
45113
return ''.join(result)
46114

47115
@staticmethod
48-
def remove_keywords(source: str):
116+
def remove_keywords(source: str) -> str:
117+
"""
118+
Remove all PHP keywords from the provided source code string.
119+
120+
:param str source: The source code string from which to remove PHP keywords.
121+
:return: The source code string with all PHP keywords removed.
122+
:rtype
123+
"""
49124
return re.sub(re.compile(PHP.keywords_regex()), '', source)

0 commit comments

Comments
 (0)