Skip to content

Commit

Permalink
Fix/query complexity (#15)
Browse files Browse the repository at this point in the history
* Fix operators calculation

* Fix spaghetti query thresholds

* Fix testcase

* Linting

* Fix test case fail
  • Loading branch information
leonardomathon authored May 17, 2022
1 parent c1fb479 commit d1ec895
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 25 deletions.
12 changes: 6 additions & 6 deletions sqleyes/detector/antipatterns/spaghetti_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,22 @@ def __init__(self, query):
super().__init__(query)

def check(self):
LOW_THRESHOLD = 60
MEDIUM_THRESHOLD = 75
HIGH_THRESHOLD = 90
LOW_THRESHOLD = 2.5
MEDIUM_THRESHOLD = 4
HIGH_THRESHOLD = 5.5

query_complexity = get_query_complexity(self.query)

if query_complexity < LOW_THRESHOLD:
return None

if LOW_THRESHOLD < query_complexity < MEDIUM_THRESHOLD:
if LOW_THRESHOLD <= query_complexity < MEDIUM_THRESHOLD:
certainty = "low"

if MEDIUM_THRESHOLD < query_complexity < HIGH_THRESHOLD:
if MEDIUM_THRESHOLD <= query_complexity < HIGH_THRESHOLD:
certainty = "medium"

if HIGH_THRESHOLD < query_complexity:
if HIGH_THRESHOLD <= query_complexity:
certainty = "high"

return DetectorOutput(query=self.query,
Expand Down
2 changes: 1 addition & 1 deletion sqleyes/utils/code_complexity_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def halstead_metrics(n1: int, n2: int, N1: int, N2: int):
V = N * math.log2(n)

# Difficulty
D = n1/2 * N2/n2
D = (n1/2) * (N2/n2)

# Effort
E = D * V
Expand Down
77 changes: 60 additions & 17 deletions sqleyes/utils/query_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@
from sqleyes.utils.query_keywords import SQL_FUNCTIONS


OPERATORS = ["+", "-", "*", "**", "/", "%", "&", "|", "||", "^", "=", ">", "<",
">=", "<=", "!<", "!>", "<>", "+=", "-=", "/=", "/=", "%=", "&=",
"^-=", "|*=", "ALL", "AND", "&&", "ANY", "BETWEEN", "EXISTS",
"IN", "LIKE", "NOT", "OR", "SOME", "IS NULL", "IS NOT NULL",
"UNIQUE"]
OPERATORS = ["+", "-", "*", "**", "/", "%", "&", "|", "||", "^", "=", "!=",
">", "<", ">=", "<=", "!<", "!>", "<>", "+=", "-=", "/=", "/=",
"%=", "&=", "^-=", "|*=", "ALL", "AND", "&&", "ANY", "BETWEEN",
"EXISTS", "IN", "LIKE", "NOT", "OR", "SOME", "IS NULL",
"IS NOT NULL", "UNIQUE"]

EXPRESSIONS = ["CASE", "DECODE", "IF", "NULLIF", "COALESCE", "GREATEST",
"GREATER", "LEAST", "LESSER", "CAST"]
"GREATER", "LEAST", "LESSER", "CAST", "JOIN", "GROUP BY",
"WHERE", "HAVING", "ORDER BY", "UNION", "EXCEPT"]


def get_subqueries(parsed_query: sqlparse.sql.Statement) -> Tuple[str, List[str]]:
Expand Down Expand Up @@ -290,16 +291,56 @@ def get_query_ops_and_expr(query: str) -> List[str]:
"""
result = []

for operator in OPERATORS:
count = query.count(operator)
if count != 0:
result.extend([operator] * count)

# Get all expressions used in the query
for expression in EXPRESSIONS:
count = query.count(expression)
if count != 0:
result.extend([expression] * count)
# Format the query
query = sqlparse.format(query, keyword_case="upper")

# Split the query
query_tokens = query.split()

# Fix split
# Merges ["SELECT", "*"] into ["SELECT *"]
# Merges ["IS", "NULL"] into ["IS NULL"]
# Merges ["ORDER", "BY"] into ["ORDER BY"]
# Merges ["GROUP", "BY"] into ["GROUP BY"]
i, query_len = 0, len(query_tokens)
while i < query_len - 1:
merge_current_cel = False
if "SELECT" in query_tokens[i] and query_tokens[i + 1] == "*":
merge_current_cel = True
elif query_tokens[i] == "IS" and query_tokens[i + 1] == "NULL":
merge_current_cel = True
elif query_tokens[i] == "ORDER" and query_tokens[i + 1] == "BY":
merge_current_cel = True
elif query_tokens[i] == "GROUP" and query_tokens[i + 1] == "BY":
merge_current_cel = True

# Merge two cells into one
# Adjust overall query length since we have one less cell
if merge_current_cel:
query_tokens[i:i + 2] = [' '.join(query_tokens[i:i + 2])]
query_len -= 1

i += 1

# Fix split
# Merges ["IS", "NOT", "NULL"] into ["IS NOT NULL"]
i, query_len = 0, len(query_tokens)
while i < query_len - 1:
if (query_tokens[i] == "IS" and query_tokens[i + 1] == "NOT" and
query_tokens[i + 2] == "NULL"):
query_tokens[i:i + 3] = [' '.join(query_tokens[i: i + 3])]
query_len -= 2
i += 1

# Check every element if its an operator or expression
for elem in query_tokens:
for operator in OPERATORS:
if elem == operator:
result.append(operator)

for expression in EXPRESSIONS:
if elem == expression:
result.append(expression)

return result

Expand Down Expand Up @@ -332,7 +373,9 @@ def get_query_complexity(query: str) -> float:
N1, N2 = len(operators), len(operands)
n1, n2 = len(set(operators)), len(set(operands))

return float(halstead_metrics(n1, n2, N1, N2)[4])
complexity = float(halstead_metrics(n1, n2, N1, N2)[3])

return complexity


def check_single_value_rule(columns: List[str]) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_main_fear_of_the_unknown(test_input, expected):
FROM product
WHERE price > 9.99
GROUP BY pSupplier
HAVING count(pId) > 10""",
""",
[[DEFINITIONS["anti_patterns"]["ambiguous_groups"]["title"],
DEFINITIONS["anti_patterns"]["ambiguous_groups"]["type"]]]
),
Expand Down

0 comments on commit d1ec895

Please sign in to comment.