-
Notifications
You must be signed in to change notification settings - Fork 34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
search: improve with CompositeSuggestQueryParser #151
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,10 +26,10 @@ | |
SortParam, | ||
) | ||
from invenio_records_resources.services.records.queryparser import ( | ||
CompositeSuggestQueryParser, | ||
FieldValueMapper, | ||
QueryParser, | ||
SearchFieldTransformer, | ||
SuggestQueryParser, | ||
) | ||
from luqum.tree import Word | ||
|
||
|
@@ -68,9 +68,16 @@ class UserSearchOptions(SearchOptions, SearchOptionsMixin): | |
# The user search needs to be highly restricted to avoid leaking | ||
# account information, hence do not edit here unless you are | ||
# absolutely sure what you're doing. | ||
suggest_parser_cls = SuggestQueryParser.factory( | ||
suggest_parser_cls = CompositeSuggestQueryParser.factory( | ||
tree_transformer_cls=SearchFieldTransformer, | ||
fields=["username^2", "email^2", "profile.full_name^3", "profile.affiliations"], | ||
fields=[ | ||
"username.keyword^2", | ||
"username^2", | ||
"email.keyword^2", | ||
ptamarit marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"email^2", | ||
"profile.full_name^3", | ||
"profile.affiliations", | ||
], | ||
# Only public emails because hidden emails are stored in email_hidden field. | ||
allow_list=["username", "email"], | ||
mapping={ | ||
|
@@ -81,7 +88,6 @@ class UserSearchOptions(SearchOptions, SearchOptionsMixin): | |
"name": "profile.full_name", | ||
}, | ||
type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types | ||
fuzziness="AUTO", # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then the |
||
) | ||
|
||
params_interpreters_cls = [ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -132,7 +132,7 @@ def users_data(): | |
"""Data for users.""" | ||
return [ | ||
{ | ||
"username": "pubres", | ||
"username": "pub-res", | ||
"email": "[email protected]", | ||
"profile": { | ||
"full_name": "Tim Smith", | ||
|
@@ -322,7 +322,7 @@ def user_pub(users): | |
@pytest.fixture(scope="module") | ||
def user_pubres(users): | ||
"""User tjs (public/restricted).""" | ||
return users["pubres"] | ||
return users["pub-res"] | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright (C) 2022 CERN. | ||
# Copyright (C) 2022-2024 CERN. | ||
# | ||
# Invenio-Users-Resources is free software; you can redistribute it and/or | ||
# modify it under the terms of the MIT License; see LICENSE file for more | ||
|
@@ -98,24 +98,36 @@ | |
assert res["hits"]["total"] == 0 | ||
|
||
|
||
USERNAME_JOSE = ["pub"] | ||
USERNAME_TIM = ["pub-res"] | ||
USERNAME_BOTH = USERNAME_JOSE + USERNAME_TIM | ||
|
||
|
||
# | ||
# Read | ||
@pytest.mark.parametrize( | ||
"query", | ||
"query,expected_usernames", | ||
[ | ||
"CERN", | ||
"Jose CERN", | ||
"Jose AND CERN", | ||
"Tim", | ||
"Tim CERN", | ||
"Jose", | ||
"Jos", | ||
"[email protected]", | ||
"pub", | ||
("CERN", USERNAME_BOTH), | ||
("Jose", USERNAME_JOSE), | ||
("Jos", USERNAME_JOSE), | ||
("Jose CERN", USERNAME_JOSE), | ||
("Tim", USERNAME_TIM), | ||
("Tim CERN", USERNAME_TIM), | ||
("[email protected]", USERNAME_JOSE), | ||
("[email protected]", USERNAME_JOSE), | ||
("pub@inveniosoft", USERNAME_JOSE), | ||
("pub", USERNAME_BOTH), | ||
("pub-res", USERNAME_TIM), | ||
("re", USERNAME_TIM), | ||
("res", USERNAME_TIM), | ||
], | ||
) | ||
def test_user_search_field(user_service, user_pub, query): | ||
def test_user_search_field(user_service, user_pub, query, expected_usernames): | ||
"""Make sure certain fields ARE searchable.""" | ||
res = user_service.search(user_pub.identity, suggest=query).to_dict() | ||
assert res["hits"]["total"] > 0 | ||
usernames = [entry["username"] for entry in res["hits"]["hits"]] | ||
assert sorted(usernames) == expected_usernames | ||
Check failure on line 130 in tests/services/users/test_service_users.py
|
||
|
||
|
||
# | ||
|
@@ -127,7 +139,7 @@ | |
assert res["username"] == "pub" | ||
assert res["email"] == user_pub.email | ||
res = user_service.read(anon_identity, user_pubres.id).to_dict() | ||
assert res["username"] == "pubres" | ||
assert res["username"] == "pub-res" | ||
assert "email" not in res | ||
pytest.raises( | ||
# TODO: Should be mapped to a 404 | ||
|
@@ -139,7 +151,7 @@ | |
|
||
|
||
@pytest.mark.parametrize( | ||
"username,can_read", [("pub", True), ("pubres", True), ("res", False)] | ||
"username,can_read", [("pub", True), ("pub-res", True), ("res", False)] | ||
) | ||
def test_read_avatar_with_anon(user_service, anon_identity, users, username, can_read): | ||
"""Anonymous users can read avatar single *public* user.""" | ||
|
@@ -160,7 +172,7 @@ | |
"username", | ||
[ | ||
"pub", | ||
"pubres", | ||
"pub-res", | ||
"res", | ||
], | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Searching for a username with a dash is still not working well.
For instance, searching from "one-two" seems to search for usernames starting with "one" and starting with "two", and therefore does not find anything.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is the issue: text field is split by
-
https://github.com/inveniosoftware/invenio-users-resources/blob/master/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json#L132if you search by username.keyword, it should work
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Awesome, thanks a lot @kpsherva !
2 months later 😅, I confirm that this fixes the issue.
I modified the tests to include a username with a dash, and I added a test showing that usernames with dashes can now be searched properly.