Skip to content

Commit d812f3f

Browse files
committed
feat: comments, boolean and delimiters regex
1 parent 536238d commit d812f3f

File tree

2 files changed

+79
-15
lines changed

2 files changed

+79
-15
lines changed

src/PyReprism/languages/kotlin.py

+37-5
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,58 @@ def keywords() -> list:
1616
return keyword
1717

1818
@staticmethod
19-
def comment_regex():
19+
def comment_regex() -> re.Pattern:
2020
pattern = re.compile(r'(?P<comment>//.*?$|/\*.*?\*/|/\*.*?$|^.*?\*/|[{}]+)|(?P<noncomment>\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^/\'"{}]*)', re.DOTALL | re.MULTILINE)
2121
return pattern
2222

2323
@staticmethod
24-
def number_regex():
24+
def number_regex() -> re.Pattern:
2525
pattern = re.compile(r'\b(?:0[bx][\da-fA-F]+|\d+(?:\.\d+)?(?:e[+-]?\d+)?[fFL]?)\b')
2626
return pattern
2727

2828
@staticmethod
29-
def operator_regex():
29+
def operator_regex() -> re.Pattern:
3030
pattern = re.compile(r'+[+=]?|-[-=>]?|==?=?|!(?:!|==?)?|[\/*%<>]=?|[?:]:?|\.\.|&&|\|\||\b(?:and|inv|or|shl|shr|ushr|xor)\b')
3131
return pattern
3232

3333
@staticmethod
34-
def keywords_regex():
34+
def keywords_regex() -> re.Pattern:
3535
return re.compile(r'\b(' + '|'.join(Kotlin.keywords()) + r')\b')
3636

37+
@staticmethod
38+
def delimiters_regex() -> re.Pattern:
39+
"""
40+
Compile and return a regular expression pattern to identify Kotlin language delimiters.
41+
42+
This function generates a regular expression that matches Kotlin language delimiters, which include parentheses `()`, brackets `[]`, braces `{}`, commas `,`, colons `:`, periods `.`, semicolons `;`, angle brackets `<`, `>`, and the question mark `?`.
43+
44+
:return: A compiled regex pattern to match Kotlin delimiters.
45+
:rtype: re.Pattern
46+
"""
47+
return re.compile(r'[()\[\]{}.,:;<>?]')
48+
49+
@staticmethod
50+
def boolean_regex() -> re.Pattern:
51+
"""
52+
Compile and return a regular expression pattern to identify Kotlin boolean literals.
53+
54+
This function generates a regular expression that matches the Kotlin boolean literals `true`, `false`, and the special constant `null`.
55+
56+
:return: A compiled regex pattern to match Kotlin boolean literals and `null`.
57+
:rtype: re.Pattern
58+
"""
59+
return re.compile(r'\b(?:true|false|null)\b')
60+
3761
@staticmethod
3862
def remove_comments(source_code: str, isList: bool = False) -> str:
63+
"""
64+
Remove comments from the provided Kotlin source code string.
65+
66+
:param str source_code: The Kotlin source code from which to remove comments.
67+
:return: The source code with all comments removed.
68+
:rtype: str
69+
"""
70+
return Kotlin.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip()
3971
result = []
4072
for match in Kotlin.comment_regex().finditer(source_code):
4173
if match.group('noncomment'):
@@ -45,5 +77,5 @@ def remove_comments(source_code: str, isList: bool = False) -> str:
4577
return ''.join(result)
4678

4779
@staticmethod
48-
def remove_keywords(source: str):
80+
def remove_keywords(source: str) -> str:
4981
return re.sub(re.compile(Kotlin.keywords_regex()), '', source)

src/PyReprism/languages/rust.py

+42-10
Original file line numberDiff line numberDiff line change
@@ -2,48 +2,80 @@
22
from PyReprism.utils import extension
33

44

5-
class Dart:
5+
class Rust:
66
def __init__():
77
pass
88

99
@staticmethod
1010
def file_extension() -> str:
11-
return extension.dart
11+
return extension.rust
1212

1313
@staticmethod
1414
def keywords() -> list:
1515
keyword = 'abstract|alignof|as|be|box|break|const|continue|crate|do|else|enum|extern|false|final|fn|for|if|impl|in|let|loop|match|mod|move|mut|offsetof|once|override|priv|pub|pure|ref|return|sizeof|static|self|struct|super|true|trait|type|typeof|unsafe|unsized|use|virtual|where|while|yield'.split('|')
1616
return keyword
1717

1818
@staticmethod
19-
def comment_regex():
19+
def comment_regex() -> re.Pattern:
2020
pattern = re.compile(r'(?P<comment>//.*?$|///.*?$|/\*[\s\S]*?\*/|/\*.*?$|^.*?\*/)|(?P<noncomment>\'(\\.|[^\\\'])*\'|"(\\.|[^\\"])*"|.[^/\'"]*)', re.DOTALL | re.MULTILINE)
2121
return pattern
2222

2323
@staticmethod
24-
def number_regex():
24+
def number_regex() -> re.Pattern:
2525
pattern = re.compile(r'\b(?:0x[\dA-Fa-f](?:_?[\dA-Fa-f])*|0o[0-7](?:_?[0-7])*|0b[01](?:_?[01])*|(\d(?:_?\d)*)?\.?\d(?:_?\d)*(?:[Ee][+-]?\d+)?)(?:_?(?:[iu](?:8|16|32|64)?|f32|f64))?\b')
2626
return pattern
2727

2828
@staticmethod
29-
def operator_regex():
29+
def operator_regex() -> re.Pattern:
3030
pattern = re.compile(r'[-+*\/%!^]=?|=[=>]?|@|&[&=]?|\|[|=]?|<<?=?|>>?=?')
3131
return pattern
3232

3333
@staticmethod
34-
def keywords_regex():
35-
return re.compile(r'\b(' + '|'.join(Dart.keywords()) + r')\b')
34+
def keywords_regex() -> re.Pattern:
35+
return re.compile(r'\b(' + '|'.join(Rust.keywords()) + r')\b')
36+
37+
@staticmethod
38+
def boolean_regex() -> re.Pattern:
39+
"""
40+
Compile and return a regular expression pattern to identify Rust boolean literals.
41+
42+
This function generates a regular expression that matches the Rust boolean literals `true`, `false`, and the special constant `None`.
43+
44+
:return: A compiled regex pattern to match Rust boolean literals and `None`.
45+
:rtype: re.Pattern
46+
"""
47+
return re.compile(r'\b(?:true|false|None)\b')
48+
49+
@staticmethod
50+
def rust_delimiters_regex() -> re.Pattern:
51+
"""
52+
Compile and return a regular expression pattern to identify Rust language delimiters.
53+
54+
This function generates a regular expression that matches Rust language delimiters, which include parentheses `()`, brackets `[]`, braces `{}`, commas `,`, colons `:`, periods `.`, semicolons `;`, angle brackets `<`, `>`, and the question mark `?`.
55+
56+
:return: A compiled regex pattern to match Rust delimiters.
57+
:rtype: re.Pattern
58+
"""
59+
return re.compile(r'[()\[\]{}.,:;<>?]')
3660

3761
@staticmethod
3862
def remove_comments(source_code: str, isList: bool = False) -> str:
63+
"""
64+
Remove comments from the provided Rust source code string.
65+
66+
:param str source_code: The Rust source code from which to remove comments.
67+
:return: The source code with all comments removed.
68+
:rtype: str
69+
"""
70+
return Rust.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip()
3971
result = []
40-
for match in Dart.comment_regex().finditer(source_code):
72+
for match in Rust.comment_regex().finditer(source_code):
4173
if match.group('noncomment'):
4274
result.append(match.group('noncomment'))
4375
if isList:
4476
return result
4577
return ''.join(result)
4678

4779
@staticmethod
48-
def remove_keywords(source: str):
49-
return re.sub(re.compile(Dart.keywords_regex()), '', source)
80+
def remove_keywords(source: str) -> str:
81+
return re.sub(re.compile(Rust.keywords_regex()), '', source)

0 commit comments

Comments
 (0)