Skip to content

Commit

Permalink
always expect exists filter (#480)
Browse files Browse the repository at this point in the history
* Make RegExFilterExpression KeyValueBased

* Add more unit tests for lucene filter

* Add Exists filter to Not expressions with key-value based child

* Fix black formatting

* Update changelog

* Fix black formatting

* Refactor adding exists filter in rule parser

* Refactor rule parser
  • Loading branch information
ppcad authored Nov 27, 2023
1 parent 426b244 commit 199200c
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
- add lru caching for peudonymizatin of urls
* improve loading times for the rule tree by optimizing the rule segmentation and sorting
* add support for python 3.12 and remove support for python 3.9
* always check the existence of a field for negated key-value based lucene filter expressions

### Bugfix

Expand Down
7 changes: 2 additions & 5 deletions logprep/filter/expression/filter_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,19 +318,16 @@ def does_match(self, document: dict) -> bool:
return self._lower_bound <= value <= self._upper_bound


class RegExFilterExpression(KeyBasedFilterExpression):
class RegExFilterExpression(KeyValueBasedFilterExpression):
"""Filter expression that matches a value using regex."""

match_escaping_pattern = re.compile(r".*?(?P<escaping>\\*)\$$")
match_parts_pattern = re.compile(r"^(?P<flag>\(\?\w\))?(?P<start>\^)?(?P<pattern>.*)")

def __init__(self, key: List[str], regex: str):
super().__init__(key)
self._regex = self._normalize_regex(regex)
self._matcher = re.compile(self._regex)

def __repr__(self) -> str:
return f"{self.key_as_dotted_string}:/{self._regex.strip('^$')}/"
super().__init__(key, f"/{self._regex.strip('^$')}/")

@staticmethod
def _normalize_regex(regex: str) -> str:
Expand Down
43 changes: 33 additions & 10 deletions logprep/framework/rule_tree/rule_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
behavior, allowing a simpler construction of the rule tree.
"""
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Optional

from logprep.filter.expression.filter_expression import (
Always,
Exists,
Not,
KeyValueBasedFilterExpression,
FilterExpression,
)

from logprep.framework.rule_tree.demorgan_resolver import DeMorganResolver
Expand Down Expand Up @@ -116,6 +118,8 @@ def _add_exists_filter(parsed_rules: list):
checks if the given field even exists. Like this unnecessary comparisons can be prevented
when the tree would check each of the values the field can have even when the field does
not exist in the current event.
It also adds an exists filter to negated key value based expressions,
so that matching a value always requires the existence of the field.
Parameters
----------
Expand All @@ -125,16 +129,35 @@ def _add_exists_filter(parsed_rules: list):
"""
for parsed_rule in parsed_rules:
temp_parsed_rule = parsed_rule.copy()
skipped_counter = 0

for segment_index, segment in enumerate(temp_parsed_rule):
if isinstance(segment, (Exists, Not, Always)):
skipped_counter += 1
added_exists_filter_count = 0
for segment_idx, segment in enumerate(temp_parsed_rule):
if isinstance(segment, (Exists, Always)):
continue
exists_filter = RuleParser._get_exists_filter(segment)
if exists_filter is None:
continue

exists_filter = Exists(segment.key)
if exists_filter in parsed_rule:
skipped_counter += 1
continue
parsed_rule.insert(segment_idx + added_exists_filter_count, exists_filter)
added_exists_filter_count += 1

@staticmethod
def _get_exists_filter(segment: FilterExpression) -> Optional[Exists]:
"""Get Exists filter expression based in segment.
Parameters
----------
segment: FilterExpression
Filter expressions segment.
Returns
-------
Optional[Exists]
Returns an Exists FilterExpression from a KeyValueBased FilterExpression or None.
parsed_rule.insert(segment_index * 2 - skipped_counter, exists_filter)
"""
if isinstance(segment, Not):
segment = segment.children[0]
if isinstance(segment, KeyValueBasedFilterExpression):
return Exists(segment.key)
return None
10 changes: 10 additions & 0 deletions tests/unit/filter/test_lucene_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
And,
Null,
Always,
Exists,
Not,
)


Expand Down Expand Up @@ -214,6 +216,14 @@ def test_creates_filter_match_all(self):
lucene_filter = LuceneFilter.create("*")
assert lucene_filter == Always(True)

def test_creates_filter_exists(self):
lucene_filter = LuceneFilter.create("foo")
assert lucene_filter == Exists(["foo"])

def test_creates_filter_not_exists(self):
lucene_filter = LuceneFilter.create("NOT foo")
assert lucene_filter == Not(Exists(["foo"]))

escape_ends_of_expressions_test_cases = [
("Empty string", "", ""),
("No escaping", "foo bar baz", "foo bar baz"),
Expand Down
32 changes: 29 additions & 3 deletions tests/unit/framework/rule_tree/test_rule_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@
# pylint: disable=too-many-statements
import pytest

from logprep.filter.expression.filter_expression import StringFilterExpression, Not, Exists
from logprep.filter.expression.filter_expression import (
StringFilterExpression,
Not,
Exists,
RegExFilterExpression,
)
from logprep.framework.rule_tree.rule_parser import RuleParser
from logprep.processor.pre_detector.rule import PreDetectorRule


pytest.importorskip("logprep.processor.pre_detector")

string_filter_expression_1 = StringFilterExpression(["key1"], "value1")
Expand Down Expand Up @@ -163,6 +169,7 @@ class TestRuleParser:
[
Exists(["EventID"]),
StringFilterExpression(["EventID"], "17"),
Exists(["Image"]),
Not(StringFilterExpression(["Image"], "*\\powershell.exe")),
Exists(["PipeName"]),
StringFilterExpression(["PipeName"], "\\PSHost*"),
Expand Down Expand Up @@ -289,6 +296,7 @@ class TestRuleParser:
[
Exists(["bar"]),
StringFilterExpression(["bar"], "foo"),
Exists(["foo"]),
Not(StringFilterExpression(["foo"], "bar")),
]
],
Expand Down Expand Up @@ -332,10 +340,12 @@ class TestRuleParser:
{},
{},
[
[Not(StringFilterExpression(["foo"], "bar"))],
[Exists(["foo"]), Not(StringFilterExpression(["foo"], "bar"))],
[
Exists(["msg"]),
Not(StringFilterExpression(["msg"], "123")),
Not(StringFilterExpression(["msg"], "456")),
Exists(["test"]),
Not(StringFilterExpression(["test"], "ok")),
],
],
Expand Down Expand Up @@ -410,6 +420,7 @@ class TestRuleParser:
Not(Not(Exists(["AImphash"]))),
Exists(["EventID"]),
StringFilterExpression(["EventID"], "15"),
Exists(["Imphash"]),
Not(StringFilterExpression(["Imphash"], "000")),
]
],
Expand Down Expand Up @@ -639,7 +650,22 @@ def test_parse_rule_param(self, rule, priority_dict, tag_map, expected_expressio
[Exists(["key3"]), string_filter_expression_3],
],
),
([[Not(string_filter_expression_1)]], [[Not(string_filter_expression_1)]]),
(
[[Not(string_filter_expression_1)]],
[[Exists(["key1"]), Not(string_filter_expression_1)]],
),
(
[[Not(Exists(["foo"]))]],
[[Not(Exists(["foo"]))]],
),
(
[[RegExFilterExpression(["foo"], "bar")]],
[[Exists(["foo"]), RegExFilterExpression(["foo"], "bar")]],
),
(
[[Not(RegExFilterExpression(["foo"], "bar"))]],
[[Exists(["foo"]), Not(RegExFilterExpression(["foo"], "bar"))]],
),
(
[[string_filter_expression_1, Exists(["key1"])], [string_filter_expression_1]],
[
Expand Down

0 comments on commit 199200c

Please sign in to comment.