Skip to content

Commit d2e5a0f

Browse files
committed
feat: apply to_tsvector() explicitly to the full-text search filtered column, only if it's not of tsvector type
1 parent e7cc8fe commit d2e5a0f

File tree

12 files changed

+422
-126
lines changed

12 files changed

+422
-126
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
1414
- #3727, Log maximum pool size - @steve-chavez
1515
- #1536, Add string comparison feature for jwt-role-claim-key - @taimoorzaeem
1616
- #3747, Allow `not_null` value for the `is` operator - @taimoorzaeem
17+
- #2255, Apply `to_tsvector()` explicitly to the full-text search filtered column (excluding `tsvector` types) - @laurenceisla
1718

1819
### Fixed
1920

docs/references/api/tables_views.rst

+15-1
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,21 @@ The :code:`fts` filter mentioned above has a number of options to support flexib
193193
194194
curl "http://localhost:3000/tsearch?my_tsv=not.wfts(french).amusant"
195195
196-
Using `websearch_to_tsquery` requires PostgreSQL of version at least 11.0 and will raise an error in earlier versions of the database.
196+
.. _fts_to_tsvector:
197+
198+
Automatic ``tsvector`` conversion
199+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
200+
201+
If the filtered column is not of type ``tsvector``, then it will be automatically converted using `to_tsvector() <https://www.postgresql.org/docs/current/functions-textsearch.html#TEXTSEARCH-FUNCTIONS-TABLE>`_.
202+
This allows using ``fts`` on ``text`` and ``json`` types out of the box, for example.
203+
204+
.. code-block:: bash
205+
206+
curl "http://localhost:3000/tsearch?my_text_column=fts(french).amusant"
207+
208+
.. code-block:: bash
209+
210+
curl "http://localhost:3000/tsearch?my_json_column=not.phfts(english).The%20Fat%20Cats"
197211
198212
.. _v_filter:
199213

src/PostgREST/ApiRequest/Types.hs

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ module PostgREST.ApiRequest.Types
1414
, JsonOperand(..)
1515
, JsonOperation(..)
1616
, JsonPath
17+
, Language
1718
, ListVal
1819
, LogicOperator(..)
1920
, LogicTree(..)

src/PostgREST/Plan.hs

+27-21
Original file line numberDiff line numberDiff line change
@@ -265,27 +265,29 @@ data ResolverContext = ResolverContext
265265
, outputType :: Text -- ^ The output type for the response payload; e.g. "csv", "json", "binary".
266266
}
267267

268-
resolveColumnField :: Column -> CoercibleField
269-
resolveColumnField col = CoercibleField (colName col) mempty False (colNominalType col) Nothing (colDefault col) False
268+
resolveColumnField :: Column -> Maybe ToTsVector -> CoercibleField
269+
resolveColumnField col toTsV = CoercibleField (colName col) mempty False toTsV (colNominalType col) Nothing (colDefault col) False
270270

271-
resolveTableFieldName :: Table -> FieldName -> CoercibleField
272-
resolveTableFieldName table fieldName =
271+
resolveTableFieldName :: Table -> FieldName -> Maybe ToTsVector -> CoercibleField
272+
resolveTableFieldName table fieldName toTsV=
273273
fromMaybe (unknownField fieldName []) $ HMI.lookup fieldName (tableColumns table) >>=
274-
Just . resolveColumnField
274+
Just . flip resolveColumnField toTsV
275275

276276
-- | Resolve a type within the context based on the given field name and JSON path. Although there are situations where failure to resolve a field is considered an error (see `resolveOrError`), there are also situations where we allow it (RPC calls). If it should be an error and `resolveOrError` doesn't fit, ensure to check the `cfIRType` isn't empty.
277-
resolveTypeOrUnknown :: ResolverContext -> Field -> CoercibleField
278-
resolveTypeOrUnknown ResolverContext{..} (fn, jp) =
277+
resolveTypeOrUnknown :: ResolverContext -> Field -> Maybe ToTsVector -> CoercibleField
278+
resolveTypeOrUnknown ResolverContext{..} (fn, jp) toTsV =
279279
case res of
280280
-- types that are already json/jsonb don't need to be converted with `to_jsonb` for using arrow operators `data->attr`
281281
-- this prevents indexes not applying https://github.com/PostgREST/postgrest/issues/2594
282-
cf@CoercibleField{cfIRType="json"} -> cf{cfJsonPath=jp, cfToJson=False}
283-
cf@CoercibleField{cfIRType="jsonb"} -> cf{cfJsonPath=jp, cfToJson=False}
282+
cf@CoercibleField{cfIRType="json"} -> cf{cfJsonPath=jp, cfToJson=False}
283+
cf@CoercibleField{cfIRType="jsonb"} -> cf{cfJsonPath=jp, cfToJson=False}
284+
-- Do not apply to_tsvector to tsvector types
285+
cf@CoercibleField{cfIRType="tsvector"} -> cf{cfJsonPath=jp, cfToJson=True, cfToTsVector=Nothing}
284286
-- other types will get converted `to_jsonb(col)->attr`, even unknown types
285-
cf -> cf{cfJsonPath=jp, cfToJson=True}
287+
cf -> cf{cfJsonPath=jp, cfToJson=True}
286288
where
287289
res = fromMaybe (unknownField fn jp) $ HM.lookup qi tables >>=
288-
Just . flip resolveTableFieldName fn
290+
Just . (\t -> resolveTableFieldName t fn toTsV)
289291

290292
-- | Install any pre-defined data representation from source to target to coerce this reference.
291293
--
@@ -311,11 +313,15 @@ withJsonParse ctx field@CoercibleField{cfIRType} = withTransformer ctx "json" cf
311313

312314
-- | Map the intermediate representation type to the output type defined by the resolver context (normally json), if available.
313315
resolveOutputField :: ResolverContext -> Field -> CoercibleField
314-
resolveOutputField ctx field = withOutputFormat ctx $ resolveTypeOrUnknown ctx field
316+
resolveOutputField ctx field = withOutputFormat ctx $ resolveTypeOrUnknown ctx field Nothing
315317

316318
-- | Map the query string format of a value (text) into the intermediate representation type, if available.
317-
resolveQueryInputField :: ResolverContext -> Field -> CoercibleField
318-
resolveQueryInputField ctx field = withTextParse ctx $ resolveTypeOrUnknown ctx field
319+
resolveQueryInputField :: ResolverContext -> Field -> OpExpr -> CoercibleField
320+
resolveQueryInputField ctx field opExpr = withTextParse ctx $ resolveTypeOrUnknown ctx field toTsVector
321+
where
322+
toTsVector = case opExpr of
323+
OpExpr _ (Fts _ lang _) -> Just $ ToTsVector lang
324+
_ -> Nothing
319325

320326
-- | Builds the ReadPlan tree on a number of stages.
321327
-- | Adds filters, order, limits on its respective nodes.
@@ -452,7 +458,7 @@ expandStarsForTable ctx@ResolverContext{representations, outputType} hasAgg rp@R
452458

453459
expandStarSelectField :: Bool -> [Column] -> CoercibleSelectField -> [CoercibleSelectField]
454460
expandStarSelectField _ columns sel@CoercibleSelectField{csField=CoercibleField{cfName="*", cfJsonPath=[]}, csAggFunction=Nothing} =
455-
map (\col -> sel { csField = withOutputFormat ctx $ resolveColumnField col }) columns
461+
map (\col -> sel { csField = withOutputFormat ctx $ resolveColumnField col Nothing }) columns
456462
expandStarSelectField True _ sel@CoercibleSelectField{csField=fld@CoercibleField{cfName="*", cfJsonPath=[]}, csAggFunction=Just Count} =
457463
[sel { csField = fld { cfFullRow = True } }]
458464
expandStarSelectField _ _ selectField = [selectField]
@@ -766,7 +772,7 @@ addOrders ctx ApiRequest{..} rReq =
766772

767773
resolveOrder :: ResolverContext -> OrderTerm -> CoercibleOrderTerm
768774
resolveOrder _ (OrderRelationTerm a b c d) = CoercibleOrderRelationTerm a b c d
769-
resolveOrder ctx (OrderTerm fld dir nulls) = CoercibleOrderTerm (resolveTypeOrUnknown ctx fld) dir nulls
775+
resolveOrder ctx (OrderTerm fld dir nulls) = CoercibleOrderTerm (resolveTypeOrUnknown ctx fld Nothing) dir nulls
770776

771777
-- Validates that the related resource on the order is an embedded resource,
772778
-- e.g. if `clients` is inside the `select` in /projects?order=clients(id)&select=*,clients(*),
@@ -831,7 +837,7 @@ addRelatedOrders (Node rp@ReadPlan{order,from} forest) = do
831837
-- where_ = [
832838
-- CoercibleStmnt (
833839
-- CoercibleFilter {
834-
-- field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson=False, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False},
840+
-- field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson=False, cfToTsVector = Nothing, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False},
835841
-- opExpr = op
836842
-- }
837843
-- )
@@ -847,7 +853,7 @@ addRelatedOrders (Node rp@ReadPlan{order,from} forest) = do
847853
-- Don't do anything to the filter if there's no embedding (a subtree) on projects. Assume it's a normal filter.
848854
--
849855
-- >>> ReadPlan.where_ . rootLabel <$> addNullEmbedFilters (readPlanTree nullOp [])
850-
-- Right [CoercibleStmnt (CoercibleFilter {field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson = False, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False}, opExpr = OpExpr True (Is IsNull)})]
856+
-- Right [CoercibleStmnt (CoercibleFilter {field = CoercibleField {cfName = "projects", cfJsonPath = [], cfToJson = False, cfToTsVector = Nothing, cfIRType = "", cfTransform = Nothing, cfDefault = Nothing, cfFullRow = False}, opExpr = OpExpr True (Is IsNull)})]
851857
--
852858
-- If there's an embedding on projects, then change the filter to use the internal aggregate name (`clients_projects_1`) so the filter can succeed later.
853859
--
@@ -866,7 +872,7 @@ addNullEmbedFilters (Node rp@ReadPlan{where_=curLogic} forest) = do
866872
newNullFilters rPlans = \case
867873
(CoercibleExpr b lOp trees) ->
868874
CoercibleExpr b lOp <$> (newNullFilters rPlans `traverse` trees)
869-
flt@(CoercibleStmnt (CoercibleFilter (CoercibleField fld [] _ _ _ _ _) opExpr)) ->
875+
flt@(CoercibleStmnt (CoercibleFilter (CoercibleField fld [] _ _ _ _ _ _) opExpr)) ->
870876
let foundRP = find (\ReadPlan{relName, relAlias} -> fld == fromMaybe relName relAlias) rPlans in
871877
case (foundRP, opExpr) of
872878
(Just ReadPlan{relAggAlias}, OpExpr b (Is IsNull)) -> Right $ CoercibleStmnt $ CoercibleFilterNullEmbed b relAggAlias
@@ -900,7 +906,7 @@ resolveLogicTree ctx (Stmnt flt) = CoercibleStmnt $ resolveFilter ctx flt
900906
resolveLogicTree ctx (Expr b op lts) = CoercibleExpr b op (map (resolveLogicTree ctx) lts)
901907

902908
resolveFilter :: ResolverContext -> Filter -> CoercibleFilter
903-
resolveFilter ctx (Filter fld opExpr) = CoercibleFilter{field=resolveQueryInputField ctx fld, opExpr=opExpr}
909+
resolveFilter ctx (Filter fld opExpr) = CoercibleFilter{field=resolveQueryInputField ctx fld opExpr, opExpr=opExpr}
904910

905911
-- Validates that spread embeds are only done on to-one relationships
906912
validateSpreadEmbeds :: ReadPlanTree -> Either ApiRequestError ReadPlanTree
@@ -963,7 +969,7 @@ mutatePlan mutation qi ApiRequest{iPreferences=Preferences{..}, ..} SchemaCache{
963969
resolveOrError :: ResolverContext -> Maybe Table -> FieldName -> Either ApiRequestError CoercibleField
964970
resolveOrError _ Nothing _ = Left NotFound
965971
resolveOrError ctx (Just table) field =
966-
case resolveTableFieldName table field of
972+
case resolveTableFieldName table field Nothing of
967973
CoercibleField{cfIRType=""} -> Left $ ColumnNotFound (tableName table) field
968974
cf -> Right $ withJsonParse ctx cf
969975

src/PostgREST/Plan/Types.hs

+16-10
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,27 @@ module PostgREST.Plan.Types
55
, CoercibleLogicTree(..)
66
, CoercibleFilter(..)
77
, TransformerProc
8+
, ToTsVector(..)
89
, CoercibleOrderTerm(..)
910
, RelSelectField(..)
1011
, RelJsonEmbedMode(..)
1112
, SpreadSelectField(..)
1213
) where
1314

1415
import PostgREST.ApiRequest.Types (AggregateFunction, Alias, Cast,
15-
Field, JsonPath, LogicOperator,
16-
OpExpr, OrderDirection, OrderNulls)
16+
Field, JsonPath, Language,
17+
LogicOperator, OpExpr,
18+
OrderDirection, OrderNulls)
1719

1820
import PostgREST.SchemaCache.Identifiers (FieldName)
1921

2022
import Protolude
2123

2224
type TransformerProc = Text
2325

26+
newtype ToTsVector = ToTsVector (Maybe Language)
27+
deriving (Eq, Show)
28+
2429
-- | A CoercibleField pairs the name of a query element with any type coercion information we need for some specific use case.
2530
-- |
2631
-- | As suggested by the name, it's often a reference to a field in a table but really it can be any nameable element (function parameter, calculation with an alias, etc) with a knowable type.
@@ -33,17 +38,18 @@ type TransformerProc = Text
3338
-- |
3439
-- | The type value is allowed to be the empty string. The analog here is soft type checking in programming languages: sometimes we don't need a variable to have a specified type and things will work anyhow. So the empty type variant is valid when we don't know and *don't need to know* about the specific type in some context. Note that this variation should not be used if it guarantees failure: in that case you should instead raise an error at the planning stage and bail out. For example, we can't parse JSON with `json_to_recordset` without knowing the types of each recipient field, and so error out. Using the empty string for the type would be incorrect and futile. On the other hand we use the empty type for RPC calls since type resolution isn't implemented for RPC, but it's fine because the query still works with Postgres' implicit coercion. In the future, hopefully we will support data representations across the board and then the empty type may be permanently retired.
3540
data CoercibleField = CoercibleField
36-
{ cfName :: FieldName
37-
, cfJsonPath :: JsonPath
38-
, cfToJson :: Bool
39-
, cfIRType :: Text -- ^ The native Postgres type of the field, the intermediate (IR) type before mapping.
40-
, cfTransform :: Maybe TransformerProc -- ^ The optional mapping from irType -> targetType.
41-
, cfDefault :: Maybe Text
42-
, cfFullRow :: Bool -- ^ True if the field represents the whole selected row. Used in spread rels: instead of COUNT(*), it does a COUNT(<row>) in order to not mix with other spreaded resources.
41+
{ cfName :: FieldName
42+
, cfJsonPath :: JsonPath
43+
, cfToJson :: Bool
44+
, cfToTsVector :: Maybe ToTsVector -- ^ If the field should be converted using to_tsvector(<language>, <field>)
45+
, cfIRType :: Text -- ^ The native Postgres type of the field, the intermediate (IR) type before mapping.
46+
, cfTransform :: Maybe TransformerProc -- ^ The optional mapping from irType -> targetType.
47+
, cfDefault :: Maybe Text
48+
, cfFullRow :: Bool -- ^ True if the field represents the whole selected row. Used in spread rels: instead of COUNT(*), it does a COUNT(<row>) in order to not mix with other spreaded resources.
4349
} deriving (Eq, Show)
4450

4551
unknownField :: FieldName -> JsonPath -> CoercibleField
46-
unknownField name path = CoercibleField name path False "" Nothing Nothing False
52+
unknownField name path = CoercibleField name path False Nothing "" Nothing Nothing False
4753

4854
-- | Like an API request LogicTree, but with coercible field information.
4955
data CoercibleLogicTree

src/PostgREST/Query/QueryBuilder.hs

+1-1
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ callPlanToQuery (FunctionCall qi params arguments returnsScalar returnsSetOfScal
206206
KeyParams [] -> "FROM " <> callIt mempty
207207
KeyParams prms -> case arguments of
208208
DirectArgs args -> "FROM " <> callIt (fmtArgs prms args)
209-
JsonArgs json -> fromJsonBodyF json ((\p -> CoercibleField (ppName p) mempty False (ppTypeMaxLength p) Nothing Nothing False) <$> prms) False True False <> ", " <>
209+
JsonArgs json -> fromJsonBodyF json ((\p -> CoercibleField (ppName p) mempty False Nothing (ppTypeMaxLength p) Nothing Nothing False) <$> prms) False True False <> ", " <>
210210
"LATERAL " <> callIt (fmtParams prms)
211211

212212
callIt :: SQL.Snippet -> SQL.Snippet

src/PostgREST/Query/SqlFragment.hs

+14-6
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ import PostgREST.Plan.Types (CoercibleField (..),
8181
CoercibleSelectField (..),
8282
RelSelectField (..),
8383
SpreadSelectField (..),
84+
ToTsVector (..),
8485
unknownField)
8586
import PostgREST.RangeQuery (NonnegRange, allRange,
8687
rangeLimit, rangeOffset)
@@ -252,10 +253,15 @@ pgFmtCallUnary :: Text -> SQL.Snippet -> SQL.Snippet
252253
pgFmtCallUnary f x = SQL.sql (encodeUtf8 f) <> "(" <> x <> ")"
253254

254255
pgFmtField :: QualifiedIdentifier -> CoercibleField -> SQL.Snippet
255-
pgFmtField table CoercibleField{cfFullRow=True} = fromQi table
256-
pgFmtField table CoercibleField{cfName=fn, cfJsonPath=[]} = pgFmtColumn table fn
257-
pgFmtField table CoercibleField{cfName=fn, cfToJson=doToJson, cfJsonPath=jp} | doToJson = "to_jsonb(" <> pgFmtColumn table fn <> ")" <> pgFmtJsonPath jp
258-
| otherwise = pgFmtColumn table fn <> pgFmtJsonPath jp
256+
pgFmtField table cf = case cfToTsVector cf of
257+
Just (ToTsVector lang) -> "to_tsvector(" <> pgFmtFtsLang lang <> fmtFld <> ")"
258+
_ -> fmtFld
259+
where
260+
fmtFld = case cf of
261+
CoercibleField{cfFullRow=True} -> fromQi table
262+
CoercibleField{cfName=fn, cfJsonPath=[]} -> pgFmtColumn table fn
263+
CoercibleField{cfName=fn, cfToJson=doToJson, cfJsonPath=jp} | doToJson -> "to_jsonb(" <> pgFmtColumn table fn <> ")" <> pgFmtJsonPath jp
264+
| otherwise -> pgFmtColumn table fn <> pgFmtJsonPath jp
259265

260266
-- Select the value of a named element from a table, applying its optional coercion mapping if any.
261267
pgFmtTableCoerce :: QualifiedIdentifier -> CoercibleField -> SQL.Snippet
@@ -399,16 +405,18 @@ pgFmtFilter table (CoercibleFilter fld (OpExpr hasNot oper)) = notOp <> " " <> p
399405
[""] -> "= ANY('{}') "
400406
_ -> "= ANY (" <> pgFmtArrayLiteralForField vals fld <> ") "
401407

402-
Fts op lang val -> " " <> ftsOperator op <> "(" <> ftsLang lang <> unknownLiteral val <> ") "
408+
Fts op lang val -> " " <> ftsOperator op <> "(" <> pgFmtFtsLang lang <> unknownLiteral val <> ") "
403409
where
404-
ftsLang = maybe mempty (\l -> unknownLiteral l <> ", ")
405410
notOp = if hasNot then "NOT" else mempty
406411
star c = if c == '*' then '%' else c
407412
fmtQuant q val = case q of
408413
Just QuantAny -> "ANY(" <> val <> ")"
409414
Just QuantAll -> "ALL(" <> val <> ")"
410415
Nothing -> val
411416

417+
pgFmtFtsLang :: Maybe Text -> SQL.Snippet
418+
pgFmtFtsLang = maybe mempty (\l -> unknownLiteral l <> ", ")
419+
412420
pgFmtJoinCondition :: JoinCondition -> SQL.Snippet
413421
pgFmtJoinCondition (JoinCondition (qi1, col1) (qi2, col2)) =
414422
pgFmtColumn qi1 col1 <> " = " <> pgFmtColumn qi2 col2

test/spec/Feature/Query/AndOrParamsSpec.hs

+12-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ spec =
8181
it "can handle is" $
8282
get "/entities?and=(name.is.null,arr.is.null)&select=id" `shouldRespondWith`
8383
[json|[{ "id": 4 }]|] { matchHeaders = [matchContentTypeJson] }
84-
it "can handle fts" $ do
84+
it "can handle fts on tsvector columns" $ do
8585
get "/entities?or=(text_search_vector.fts.bar,text_search_vector.fts.baz)&select=id" `shouldRespondWith`
8686
[json|[{ "id": 1 }, { "id": 2 }]|] { matchHeaders = [matchContentTypeJson] }
8787
get "/tsearch?or=(text_search_vector.plfts(german).Art%20Spass, text_search_vector.plfts(french).amusant%20impossible, text_search_vector.fts(english).impossible)" `shouldRespondWith`
@@ -90,6 +90,17 @@ spec =
9090
{"text_search_vector": "'amus':5 'fair':7 'impossibl':9 'peu':4" },
9191
{"text_search_vector": "'art':4 'spass':5 'unmog':7"}
9292
]|] { matchHeaders = [matchContentTypeJson] }
93+
it "can handle fts on text and json columns" $ do
94+
get "/grandchild_entities?or=(jsonb_col.fts.bar,jsonb_col.fts.foo)&select=jsonb_col" `shouldRespondWith`
95+
[json|[
96+
{ "jsonb_col": {"a": {"b":"foo"}} },
97+
{ "jsonb_col": {"b":"bar"} }]
98+
|] { matchHeaders = [matchContentTypeJson] }
99+
get "/tsearch_to_tsvector?and=(text_search.not.plfts(german).Art%20Spass, text_search.not.plfts(french).amusant%20impossible, text_search.not.fts(english).impossible)&select=text_search" `shouldRespondWith`
100+
[json|[
101+
{ "text_search": "But also fun to do what is possible" },
102+
{ "text_search": "Fat cats ate rats" }]
103+
|] { matchHeaders = [matchContentTypeJson] }
93104
it "can handle isdistinct" $
94105
get "/entities?and=(id.gte.2,arr.isdistinct.{1,2})&select=id" `shouldRespondWith`
95106
[json|[{ "id": 3 }, { "id": 4 }]|] { matchHeaders = [matchContentTypeJson] }

0 commit comments

Comments
 (0)