Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add .matches(node: BaseNode) method to MetadataFilter, MetadataFilters #17584

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 59 additions & 22 deletions llama-index-core/llama_index/core/vector_stores/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,7 @@ class FilterOperator(str, Enum):
ANY = "any" # Contains any (array of strings)
ALL = "all" # Contains all (array of strings)
TEXT_MATCH = "text_match" # full text match (allows you to search for a specific substring, token or phrase within the text field)
TEXT_MATCH_INSENSITIVE = (
"text_match_insensitive" # full text match (case insensitive)
)
TEXT_MATCH_INSENSITIVE = "text_match_insensitive" # full text match (case insensitive)
CONTAINS = "contains" # metadata array contains value (string or number)
IS_EMPTY = "is_empty" # the field is not exist or empty (null or empty array)

Expand Down Expand Up @@ -126,6 +124,45 @@ def from_dict(
"""
return MetadataFilter.model_validate(filter_dict)

def matches(self, node: BaseNode) -> bool:
if self.operator == FilterOperator.EQ:
return node.metadata.get(self.key) == self.value
elif self.operator == FilterOperator.GT:
return node.metadata.get(self.key) > self.value
elif self.operator == FilterOperator.LT:
return node.metadata.get(self.key) < self.value
elif self.operator == FilterOperator.NE:
return node.metadata.get(self.key) != self.value
elif self.operator == FilterOperator.GTE:
return node.metadata.get(self.key) >= self.value
elif self.operator == FilterOperator.LTE:
return node.metadata.get(self.key) <= self.value
elif self.operator == FilterOperator.IN:
return node.metadata.get(self.key) in self.value
elif self.operator == FilterOperator.NIN:
return node.metadata.get(self.key) not in self.value
elif self.operator == FilterOperator.ANY:
raise NotImplementedError("ANY operator not implemented yet")
elif self.operator == FilterOperator.ALL:
raise NotImplementedError("ALL operator not implemented yet")
elif self.operator == FilterOperator.TEXT_MATCH:
raise NotImplementedError("TEXT_MATCH operator not implemented yet")
elif self.operator == FilterOperator.TEXT_MATCH_INSENSITIVE:
raise NotImplementedError("TEXT_MATCH_INSENSITIVE operator not implemented yet")
elif self.operator == FilterOperator.CONTAINS:
return node.metadata.get(self.key) in self.value
elif self.operator == FilterOperator.IS_EMPTY:
if self.key not in node.metadata:
return True
elif node.metadata.get(self.key) is None:
return True
elif hasattr(node.metadata.get(self.key), "__len__"):
return len(node.metadata.get(self.key)) == 0
else:
return False
else:
raise ValueError(f"Unknown filter operator: {self.operator}")


# # TODO: Deprecate ExactMatchFilter and use MetadataFilter instead
# # Keep class for now so that AutoRetriever can still work with old vector stores
Expand Down Expand Up @@ -175,27 +212,35 @@ def from_dicts(

"""
return cls(
filters=[
MetadataFilter.from_dict(filter_dict) for filter_dict in filter_dicts
],
filters=[MetadataFilter.from_dict(filter_dict) for filter_dict in filter_dicts],
condition=condition,
)

def legacy_filters(self) -> List[ExactMatchFilter]:
"""Convert MetadataFilters to legacy ExactMatchFilters."""
filters = []
for filter in self.filters:
if (
isinstance(filter, MetadataFilters)
or filter.operator != FilterOperator.EQ
):
if isinstance(filter, MetadataFilters) or filter.operator != FilterOperator.EQ:
raise ValueError(
"Vector Store only supports exact match filters. "
"Please use ExactMatchFilter or FilterOperator.EQ instead."
)
filters.append(ExactMatchFilter(key=filter.key, value=filter.value))
return filters

def matches(self, node: BaseNode) -> bool:
sub_conditions = [sub_filter.matches(node) for sub_filter in self.filters]

if self.condition == FilterCondition.AND:
return all(sub_conditions)
elif self.condition == FilterCondition.OR:
return any(sub_conditions)
elif self.condition == FilterCondition.NOT:
assert len(sub_conditions) == 1, "NOT condition must have exactly one sub-filter"
return not sub_conditions[0]
else:
raise ValueError(f"Unknown filter condition: {self.condition}")


class VectorStoreQuerySpec(BaseModel):
"""Schema for a structured request for vector store
Expand Down Expand Up @@ -308,19 +353,15 @@ def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResul
"""Query vector store."""
...

async def aquery(
self, query: VectorStoreQuery, **kwargs: Any
) -> VectorStoreQueryResult:
async def aquery(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
"""
Asynchronously query vector store.
NOTE: this is not implemented for all vector stores. If not implemented,
it will just call query synchronously.
"""
return self.query(query, **kwargs)

def persist(
self, persist_path: str, fs: Optional[fsspec.AbstractFileSystem] = None
) -> None:
def persist(self, persist_path: str, fs: Optional[fsspec.AbstractFileSystem] = None) -> None:
return None


Expand Down Expand Up @@ -416,17 +457,13 @@ async def aclear(self) -> None:
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
"""Query vector store."""

async def aquery(
self, query: VectorStoreQuery, **kwargs: Any
) -> VectorStoreQueryResult:
async def aquery(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
"""
Asynchronously query vector store.
NOTE: this is not implemented for all vector stores. If not implemented,
it will just call query synchronously.
"""
return self.query(query, **kwargs)

def persist(
self, persist_path: str, fs: Optional[fsspec.AbstractFileSystem] = None
) -> None:
def persist(self, persist_path: str, fs: Optional[fsspec.AbstractFileSystem] = None) -> None:
return None
Loading