|
| 1 | +import re |
| 2 | +from PyReprism.utils import extension |
| 3 | + |
| 4 | + |
| 5 | +class Sql: |
| 6 | + def __init__(self): |
| 7 | + pass |
| 8 | + |
| 9 | + @staticmethod |
| 10 | + def file_extension() -> str: |
| 11 | + """ |
| 12 | + Return the file extension used for SQL files. |
| 13 | +
|
| 14 | + :return: The file extension for SQL files. |
| 15 | + :rtype: str |
| 16 | + """ |
| 17 | + return extension.sql |
| 18 | + |
| 19 | + @staticmethod |
| 20 | + def keywords() -> list: |
| 21 | + """ |
| 22 | + Return a list of SQL keywords and built-in functions. |
| 23 | +
|
| 24 | + :return: A list of SQL keywords and built-in function names. |
| 25 | + :rtype: list |
| 26 | + """ |
| 27 | + keyword = 'ACTION|ADD|AFTER|ALGORITHM|ALL|ALTER|ANALYZE|ANY|APPLY|AS|ASC|AUTHORIZATION|AUTO_INCREMENT|BACKUP|BDB|BEGIN|BERKELEYDB|BIGINT|BINARY|BIT|BLOB|BOOL|BOOLEAN|BREAK|BROWSE|BTREE|BULK|BY|CALL|CASCADED|CASE|CHAIN|CHAR|CHARACTER|CHARSET|CHECK|CHECKPOINT|CLOSE|CLUSTERED|COALESCE|COLLATE|COLUMNS|COMMENT|COMMIT|COMMITTED|COMPUTE|CONNECT|CONSISTENT|CONSTRAINT|CONTAINS|CONTAINSTABLE|CONTINUE|CONVERT|CREATE|CROSS|CURRENT_DATE|CURRENT_TIME|CURRENT_TIMESTAMP|_USER|CURSOR|CYCLE|DATA|DATABASES|DATE|DATETIME|DAY|DBCC|DEALLOCATE|DEC|DECIMAL|DECLARE|DEFAULT|DEFINER|DELAYED|DELETE|DELIMITERS|DENY|DESC|DESCRIBE|DETERMINISTIC|DISABLE|DISCARD|DISK|DISTINCT|DISTINCTROW|DISTRIBUTED|DO|DOUBLE|DROP|DUMMY|DUMP|DUMPFILE|DUPLICATE|ELSE|ELSEIF|ENABLE|ENCLOSED|END|ENGINE|ENUM|ERRLVL|ERRORS|ESCAPED|EXCEPT|EXEC|EXECUTE|EXISTS|EXIT|EXPLAIN|EXTENDED|FETCH|FIELDS|FILE|FILLFACTOR|FIRST|FIXED|FLOAT|FOLLOWING|FOR|FOREACH|FORROW|FORCE|FOREIGN|FREETEXT TABLE|FROM|FULL|FUNCTION|GEOMETRY|GEOMETRYCOLLECTION|GLOBAL|GOTO|GRANT|GROUP|HANDLER|HASH|HAVING|HOLDLOCK|HOUR|IDENTITY|IDENTITY_INSERT|COL|IF|IGNORE|IMPORT|INDEX|INFILE|INNER|INNODB|INOUT|INSERT|INT|INTEGER|INTERSECT|INTERVAL|INTO|INVOKER|ISOLATION|ITERATE|JOIN|KEYS|KILL|LANGUAGE|LAST|LEAVE|LEFT|LEVEL|LIMIT|LINENO|LINES|LINESTRING|LOAD|LOCAL|LOCK|LONG|LONGBLOB|TEXT)|LOOP|MATCH|MATCHED|MEDIUM|MEDIUMBLOB|INT|TEXT)|MERGE|MIDDLEINT|MINUTE|MODE|MODIFIES|MODIFY|MONTH|MULTI|MULTILINESTRING|POINT|POLYGON)|NATIONAL|NATURAL|NCHAR|NEXT|NO|NONCLUSTERED|NULLIF|NUMERIC|OFF|OFFSETS|ON|OPEN|OPENDATASOURCE|QUERY|ROWSET|OPTIMIZE|OPTION|OPTIONALLY|ORDER|OUT|OUTER|FILE|OVER|PARTIAL|PARTITION|PERCENT|PIVOT|PLAN|POINT|POLYGON|PRECEDING|PRECISION|PREPARE|PREV|PRIMARY|PRINT|PRIVILEGES|PROC|PROCEDURE|PUBLIC|PURGE|QUICK|RAISERROR|READS|REAL|RECONFIGURE|REFERENCES|RELEASE|RENAME|REPEAT|REPEATABLE|REPLACE|REPLICATION|REQUIRE|RESIGNAL|RESTORE|RESTRICT|RETURNS|REVOKE|RIGHT|ROLLBACK|ROUTINE|ROW|ROWCOUNT|GUIDCOL|S|RTREE|RULE|SAVE|SAVEPOINT|SCHEMA|SECOND|SELECT|SERIAL|SERIAIZABLE|SESSION|SESSION_USER|SET|SETUSER|SHARE|SHOW|SHUTDOWN|SIMPLE|SMALLINT|SNAPSHOT|SOME|SONAME|SQL|START|STARTING|STATISTICS|STATUS|STRIPED|SYSTEM_USER|TABLES|TABLESPACE|TEMP|TEMPORARY|TABLE|TERMINATED|TEXT|TEXTSIZE|THEN|TIME|TIMESTAMP|TINY|TINYBLOB|INT|TEXT)|TOP|TRAN|TRANSACTIONS|TRIGGER|TRUNCATE|TSEQUAL|TYPES|UNBOUNDED|UNCOMMITTED|UNDEFINED|UNION|UNIQUE|UNLOCK|UNPIVOT|UNSIGNED|UPDATE|UPDATETEXT|USAGE|USE|USER|USING|VALUES|VAR|VARBINARY|CHAR|CHARACTER|YING)|VIEW|WAITFOR|WARNINGS|WHEN|WHERE|WHILE|WITH|WITHROLLUP|IN|WORK|WRITE|WRITETEXT|YEAR'.split('|') |
| 28 | + return keyword |
| 29 | + |
| 30 | + @staticmethod |
| 31 | + def comment_regex() -> re.Pattern: |
| 32 | + """ |
| 33 | + Compile and return a regular expression pattern to identify different types of comments and non-comment code in SQL source files. |
| 34 | +
|
| 35 | + :return: A compiled regex pattern with named groups to match single-line comments, multiline comments, and non-comment code elements. |
| 36 | + :rtype: re.Pattern |
| 37 | + """ |
| 38 | + pattern = re.compile(r'(P<comment>//.*$|/\*[^*]*\*+ [^/*][^*]*\*+)*/)|(P<noncomment>[^/]+)', re.DOTALL | re.MULTILINE) |
| 39 | + return pattern |
| 40 | + |
| 41 | + @staticmethod |
| 42 | + def number_regex() -> re.Pattern: |
| 43 | + """ |
| 44 | + Compile and return a regular expression pattern to identify numeric literals in SQL code. |
| 45 | +
|
| 46 | + :return: A compiled regex pattern to match SQL numeric literals, including integers, floats, and complex numbers. |
| 47 | + :rtype: re.Pattern |
| 48 | + """ |
| 49 | + pattern = re.compile(r'\b0b[01]+\b|\b0x[\da-f]*\.[\da-fp-]+\b| \b\d+\.\d*|\B\.\d+) e[+-]\d+[df]') |
| 50 | + return pattern |
| 51 | + |
| 52 | + @staticmethod |
| 53 | + def operator_regex() -> re.Pattern: |
| 54 | + """ |
| 55 | + Compile and return a regular expression pattern to identify SQL operators. |
| 56 | +
|
| 57 | + :return: A compiled regex pattern to match various SQL operators and logical keywords. |
| 58 | + :rtype: re.Pattern |
| 59 | + """ |
| 60 | + pattern = re.compile(r'(^|[^.]) \+[+=]|-[-=]|!=|<<=|>>>=|==|&[&=]|\|[|=]|\*=|\/=|%=|\^=|[:~])') |
| 61 | + return pattern |
| 62 | + |
| 63 | + @staticmethod |
| 64 | + def keywords_regex() -> re.Pattern: |
| 65 | + """ |
| 66 | + Compile and return a regular expression pattern to identify SQL keywords. |
| 67 | +
|
| 68 | + :return: A compiled regex pattern to match SQL keywords. |
| 69 | + :rtype: re.Pattern |
| 70 | + """ |
| 71 | + return re.compile(r'\b(' + '|'.join(Sql.keywords()) + r')\b') |
| 72 | + |
| 73 | + @staticmethod |
| 74 | + def boolean_regex() -> re.Pattern: |
| 75 | + """ |
| 76 | + Compile and return a regular expression pattern to identify SQL boolean literals. |
| 77 | +
|
| 78 | + :return: A compiled regex pattern to match SQL boolean literals. |
| 79 | + :rtype: re.Pattern |
| 80 | + """ |
| 81 | + return re.compile(r'\b true|false)\b') |
| 82 | + |
| 83 | + @staticmethod |
| 84 | + def delimiters_regex() -> re.Pattern: |
| 85 | + """ |
| 86 | + Compile and return a regular expression pattern to identify SQL delimiters. |
| 87 | +
|
| 88 | + :return: A compiled regex pattern to match SQL delimiters. |
| 89 | + :rtype: re.Pattern |
| 90 | + """ |
| 91 | + return re.compile(r'[()\[\]{}.,:;@<>]') |
| 92 | + |
| 93 | + @staticmethod |
| 94 | + def remove_comments(source_code: str) -> str: |
| 95 | + """ |
| 96 | + Remove comments from the provided SQL source code string. |
| 97 | +
|
| 98 | + :param str source_code: The SQL source code from which to remove comments. |
| 99 | + :return: The source code with all comments removed. |
| 100 | + :rtype: str |
| 101 | + """ |
| 102 | + return Sql.comment_regex().sub(lambda match: match.group('noncomment') if match.group('noncomment') else '', source_code).strip() |
| 103 | + |
| 104 | + @staticmethod |
| 105 | + def remove_keywords(source: str) -> str: |
| 106 | + """ |
| 107 | + Remove all SQL keywords from the provided source code string. |
| 108 | +
|
| 109 | + :param str source: The source code string from which to remove SQL keywords. |
| 110 | + :return: The source code string with all SQL keywords removed. |
| 111 | + :rtype: str |
| 112 | + """ |
| 113 | + return re.sub(re.compile(Sql.keywords_regex()), '', source).strip() |
0 commit comments