Skip to content

Commit c4dc4b4

Browse files
committed
(feat): add sql
1 parent e9079fa commit c4dc4b4

File tree

2 files changed

+114
-0
lines changed

2 files changed

+114
-0
lines changed

src/PyReprism/languages/sql.py

+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
import re
2+
from PyReprism.utils import extension
3+
4+
5+
class Sql:
6+
def __init__(self):
7+
pass
8+
9+
@staticmethod
10+
def file_extension() -> str:
11+
"""
12+
Return the file extension used for SQL files.
13+
14+
:return: The file extension for SQL files.
15+
:rtype: str
16+
"""
17+
return extension.sql
18+
19+
@staticmethod
20+
def keywords() -> list:
21+
"""
22+
Return a list of SQL keywords and built-in functions.
23+
24+
:return: A list of SQL keywords and built-in function names.
25+
:rtype: list
26+
"""
27+
keyword = 'ACTION|ADD|AFTER|ALGORITHM|ALL|ALTER|ANALYZE|ANY|APPLY|AS|ASC|AUTHORIZATION|AUTO_INCREMENT|BACKUP|BDB|BEGIN|BERKELEYDB|BIGINT|BINARY|BIT|BLOB|BOOL|BOOLEAN|BREAK|BROWSE|BTREE|BULK|BY|CALL|CASCADED|CASE|CHAIN|CHAR|CHARACTER|CHARSET|CHECK|CHECKPOINT|CLOSE|CLUSTERED|COALESCE|COLLATE|COLUMNS|COMMENT|COMMIT|COMMITTED|COMPUTE|CONNECT|CONSISTENT|CONSTRAINT|CONTAINS|CONTAINSTABLE|CONTINUE|CONVERT|CREATE|CROSS|CURRENT_DATE|CURRENT_TIME|CURRENT_TIMESTAMP|_USER|CURSOR|CYCLE|DATA|DATABASES|DATE|DATETIME|DAY|DBCC|DEALLOCATE|DEC|DECIMAL|DECLARE|DEFAULT|DEFINER|DELAYED|DELETE|DELIMITERS|DENY|DESC|DESCRIBE|DETERMINISTIC|DISABLE|DISCARD|DISK|DISTINCT|DISTINCTROW|DISTRIBUTED|DO|DOUBLE|DROP|DUMMY|DUMP|DUMPFILE|DUPLICATE|ELSE|ELSEIF|ENABLE|ENCLOSED|END|ENGINE|ENUM|ERRLVL|ERRORS|ESCAPED|EXCEPT|EXEC|EXECUTE|EXISTS|EXIT|EXPLAIN|EXTENDED|FETCH|FIELDS|FILE|FILLFACTOR|FIRST|FIXED|FLOAT|FOLLOWING|FOR|FOREACH|FORROW|FORCE|FOREIGN|FREETEXT TABLE|FROM|FULL|FUNCTION|GEOMETRY|GEOMETRYCOLLECTION|GLOBAL|GOTO|GRANT|GROUP|HANDLER|HASH|HAVING|HOLDLOCK|HOUR|IDENTITY|IDENTITY_INSERT|COL|IF|IGNORE|IMPORT|INDEX|INFILE|INNER|INNODB|INOUT|INSERT|INT|INTEGER|INTERSECT|INTERVAL|INTO|INVOKER|ISOLATION|ITERATE|JOIN|KEYS|KILL|LANGUAGE|LAST|LEAVE|LEFT|LEVEL|LIMIT|LINENO|LINES|LINESTRING|LOAD|LOCAL|LOCK|LONG|LONGBLOB|TEXT)|LOOP|MATCH|MATCHED|MEDIUM|MEDIUMBLOB|INT|TEXT)|MERGE|MIDDLEINT|MINUTE|MODE|MODIFIES|MODIFY|MONTH|MULTI|MULTILINESTRING|POINT|POLYGON)|NATIONAL|NATURAL|NCHAR|NEXT|NO|NONCLUSTERED|NULLIF|NUMERIC|OFF|OFFSETS|ON|OPEN|OPENDATASOURCE|QUERY|ROWSET|OPTIMIZE|OPTION|OPTIONALLY|ORDER|OUT|OUTER|FILE|OVER|PARTIAL|PARTITION|PERCENT|PIVOT|PLAN|POINT|POLYGON|PRECEDING|PRECISION|PREPARE|PREV|PRIMARY|PRINT|PRIVILEGES|PROC|PROCEDURE|PUBLIC|PURGE|QUICK|RAISERROR|READS|REAL|RECONFIGURE|REFERENCES|RELEASE|RENAME|REPEAT|REPEATABLE|REPLACE|REPLICATION|REQUIRE|RESIGNAL|RESTORE|RESTRICT|RETURNS|REVOKE|RIGHT|ROLLBACK|ROUTINE|ROW|ROWCOUNT|GUIDCOL|S|RTREE|RULE|SAVE|SAVEPOINT|SCHEMA|SECOND|SELECT|SERIAL|SERIAIZABLE|SESSION|SESSION_USER|SET|SETUSER|SHARE|SHOW|SHUTDOWN|SIMPLE|SMALLINT|SNAPSHOT|SOME|SONAME|SQL|START|STARTING|STATISTICS|STATUS|STRIPED|SYSTEM_USER|TABLES|TABLESPACE|TEMP|TEMPORARY|TABLE|TERMINATED|TEXT|TEXTSIZE|THEN|TIME|TIMESTAMP|TINY|TINYBLOB|INT|TEXT)|TOP|TRAN|TRANSACTIONS|TRIGGER|TRUNCATE|TSEQUAL|TYPES|UNBOUNDED|UNCOMMITTED|UNDEFINED|UNION|UNIQUE|UNLOCK|UNPIVOT|UNSIGNED|UPDATE|UPDATETEXT|USAGE|USE|USER|USING|VALUES|VAR|VARBINARY|CHAR|CHARACTER|YING)|VIEW|WAITFOR|WARNINGS|WHEN|WHERE|WHILE|WITH|WITHROLLUP|IN|WORK|WRITE|WRITETEXT|YEAR'.split('|')
28+
return keyword
29+
30+
@staticmethod
31+
def comment_regex() -> re.Pattern:
32+
"""
33+
Compile and return a regular expression pattern to identify different types of comments and non-comment code in SQL source files.
34+
35+
:return: A compiled regex pattern with named groups to match single-line comments, multiline comments, and non-comment code elements.
36+
:rtype: re.Pattern
37+
"""
38+
pattern = re.compile(r'(P<comment>//.*$|/\*[^*]*\*+ [^/*][^*]*\*+)*/)|(P<noncomment>[^/]+)', re.DOTALL | re.MULTILINE)
39+
return pattern
40+
41+
@staticmethod
42+
def number_regex() -> re.Pattern:
43+
"""
44+
Compile and return a regular expression pattern to identify numeric literals in SQL code.
45+
46+
:return: A compiled regex pattern to match SQL numeric literals, including integers, floats, and complex numbers.
47+
:rtype: re.Pattern
48+
"""
49+
pattern = re.compile(r'\b0b[01]+\b|\b0x[\da-f]*\.[\da-fp-]+\b| \b\d+\.\d*|\B\.\d+) e[+-]\d+[df]')
50+
return pattern
51+
52+
@staticmethod
53+
def operator_regex() -> re.Pattern:
54+
"""
55+
Compile and return a regular expression pattern to identify SQL operators.
56+
57+
:return: A compiled regex pattern to match various SQL operators and logical keywords.
58+
:rtype: re.Pattern
59+
"""
60+
pattern = re.compile(r'(^|[^.]) \+[+=]|-[-=]|!=|<<=|>>>=|==|&[&=]|\|[|=]|\*=|\/=|%=|\^=|[:~])')
61+
return pattern
62+
63+
@staticmethod
64+
def keywords_regex() -> re.Pattern:
65+
"""
66+
Compile and return a regular expression pattern to identify SQL keywords.
67+
68+
:return: A compiled regex pattern to match SQL keywords.
69+
:rtype: re.Pattern
70+
"""
71+
return re.compile(r'\b(' + '|'.join(Sql.keywords()) + r')\b')
72+
73+
@staticmethod
74+
def boolean_regex() -> re.Pattern:
75+
"""
76+
Compile and return a regular expression pattern to identify SQL boolean literals.
77+
78+
:return: A compiled regex pattern to match SQL boolean literals.
79+
:rtype: re.Pattern
80+
"""
81+
return re.compile(r'\b true|false)\b')
82+
83+
@staticmethod
84+
def delimiters_regex() -> re.Pattern:
85+
"""
86+
Compile and return a regular expression pattern to identify SQL delimiters.
87+
88+
:return: A compiled regex pattern to match SQL delimiters.
89+
:rtype: re.Pattern
90+
"""
91+
return re.compile(r'[()\[\]{}.,:;@<>]')
92+
93+
@staticmethod
94+
def remove_comments(source_code: str) -> str:
95+
"""
96+
Remove comments from the provided SQL source code string.
97+
98+
:param str source_code: The SQL source code from which to remove comments.
99+
:return: The source code with all comments removed.
100+
:rtype: str
101+
"""
102+
return Sql.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip()
103+
104+
@staticmethod
105+
def remove_keywords(source: str) -> str:
106+
"""
107+
Remove all SQL keywords from the provided source code string.
108+
109+
:param str source: The source code string from which to remove SQL keywords.
110+
:return: The source code string with all SQL keywords removed.
111+
:rtype: str
112+
"""
113+
return re.sub(re.compile(Sql.keywords_regex()), '', source).strip()

src/PyReprism/utils/extension.py

+1
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@
128128
smalltalk: str = ".st"
129129
smarty: str = ".tpl"
130130
soy: str = ".soy"
131+
sql: str = ".sql"
131132
stylus: str = ".styl"
132133
swift: str = ".swift"
133134
tcl: str = ".tcl"

0 commit comments

Comments
 (0)