Skip to content

Commit 0ff70ab

Browse files
committed
fix: remove package fuzzyset in favor of fuzzystrmatch-pg
WIP Signed-off-by: Taimoor Zaeem <taimoorzaeem@gmail.com>
1 parent de4ad62 commit 0ff70ab

15 files changed

Lines changed: 114 additions & 112 deletions

File tree

nix/overlays/haskell-packages.nix

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,15 @@ let
4646
# + For stack.yaml.lock, CI should report an error with the correct lock, copy/paste that one into the file
4747
# - To modify and try packages locally, see "Working with locally modified Haskell packages" in the Nix README.
4848

49-
# Before upgrading fuzzyset to 0.3, check: https://github.com/PostgREST/postgrest/issues/3329
50-
fuzzyset = prev.fuzzyset_0_2_4;
49+
# TODO: Remove once available in nixpkgs haskellPackages
50+
fuzzystrmatch-pg =
51+
prev.callHackageDirect
52+
{
53+
pkg = "fuzzystrmatch-pg";
54+
ver = "0.1.0.0";
55+
sha256 = "sha256-m0Kl0nA6lojyA4yFiQnJDzYoYAzplrI+qS7AjPQ3YeQ=";
56+
}
57+
{ };
5158

5259
# Downgrade hasql and related packages while we are still on GHC 9.4 for the static build.
5360
hasql = lib.dontCheck (lib.doJailbreak prev.hasql_1_6_4_4);

postgrest.cabal

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ library
116116
, directory >= 1.2.6 && < 1.4
117117
, either >= 4.4.1 && < 5.1
118118
, extra >= 1.7.0 && < 2.0
119-
, fuzzyset >= 0.2.4 && < 0.3
119+
, fuzzystrmatch-pg >= 0.1 && < 0.2
120120
, hasql >= 1.6.1.1 && < 1.7
121121
, hasql-dynamic-statements >= 0.3.1 && < 0.4
122122
, hasql-notifications >= 0.2.2.2 && < 0.2.3

src/PostgREST/Error.hs

Lines changed: 60 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ Module : PostgREST.Error
33
Description : PostgREST error HTTP responses
44
-}
55
{-# OPTIONS_GHC -fno-warn-orphans #-}
6-
{-# LANGUAGE NamedFieldPuns #-}
76
{-# LANGUAGE RecordWildCards #-}
87

98
module PostgREST.Error
@@ -25,7 +24,7 @@ import qualified Data.Aeson as JSON
2524
import qualified Data.ByteString.Char8 as BS
2625
import qualified Data.ByteString.Lazy as LBS
2726
import qualified Data.CaseInsensitive as CI
28-
import qualified Data.FuzzySet as Fuzzy
27+
import qualified Data.FuzzyStrMatch as Fuzzy
2928
import qualified Data.HashMap.Strict as HM
3029
import qualified Data.Map.Internal as M
3130
import qualified Data.Text as T
@@ -43,7 +42,6 @@ import PostgREST.MediaType (MediaType (..))
4342
import qualified PostgREST.MediaType as MediaType
4443

4544
import PostgREST.Config (Verbosity (..))
46-
import PostgREST.SchemaCache (SchemaCache (SchemaCache, dbTablesFuzzyIndex))
4745
import PostgREST.SchemaCache.Identifiers (QualifiedIdentifier (..),
4846
Schema)
4947
import PostgREST.SchemaCache.Relationship (Cardinality (..),
@@ -52,6 +50,7 @@ import PostgREST.SchemaCache.Relationship (Cardinality (..),
5250
RelationshipsMap)
5351
import PostgREST.SchemaCache.Routine (Routine (..),
5452
RoutineParam (..))
53+
import PostgREST.SchemaCache.Table (Table (..))
5554

5655
import PostgREST.Error.Types
5756

@@ -277,7 +276,7 @@ instance ErrorBody SchemaCacheError where
277276
where
278277
onlySingleParams = isInvPost && contentType `elem` [MTTextPlain, MTTextXML, MTOctetStream]
279278
hint (AmbiguousRpc _) = Just "Try renaming the parameters or the function itself in the database so function overloading can be resolved"
280-
hint (TableNotFound schemaName relName schemaCache) = JSON.String <$> tableNotFoundHint schemaName relName schemaCache
279+
hint (TableNotFound schemaName relName tbls) = JSON.String <$> tableNotFoundHint schemaName relName tbls
281280

282281
hint _ = Nothing
283282

@@ -303,13 +302,13 @@ instance ErrorBody SchemaCacheError where
303302
-- Just "Perhaps you meant 'roles' instead of 'role'."
304303
--
305304
-- >>> noRelBetweenHint "films" "actors" "api" rels
306-
-- Nothing
305+
-- Just "Perhaps you meant 'directors' instead of 'actors'."
307306
--
308307
-- >>> noRelBetweenHint "noclosealternative" "roles" "api" rels
309-
-- Nothing
308+
-- Just "Perhaps you meant 'films' instead of 'noclosealternative'."
310309
--
311310
-- >>> noRelBetweenHint "films" "noclosealternative" "api" rels
312-
-- Nothing
311+
-- Just "Perhaps you meant 'directors' instead of 'noclosealternative'."
313312
--
314313
-- >>> noRelBetweenHint "films" "noclosealternative" "noclosealternative" rels
315314
-- Nothing
@@ -321,11 +320,10 @@ noRelBetweenHint parent child schema allRels = ("Perhaps you meant '" <>) <$>
321320
else (<> "' instead of '" <> parent <> "'.") <$> suggestParent
322321
where
323322
findParent = HM.lookup (QualifiedIdentifier schema parent, schema) allRels
324-
fuzzySetOfParents = Fuzzy.fromList [qiName (fst p) | p <- HM.keys allRels, snd p == schema]
325-
fuzzySetOfChildren = Fuzzy.fromList [qiName (relForeignTable c) | c <- fromMaybe [] findParent]
326-
suggestParent = Fuzzy.getOne fuzzySetOfParents parent
327-
-- Do not give suggestion if the child is found in the relations (weight = 1.0)
328-
suggestChild = headMay [snd k | k <- Fuzzy.get fuzzySetOfChildren child, fst k < 1.0]
323+
parentList = [qiName (fst p) | p <- HM.keys allRels, snd p == schema]
324+
childrenList = [qiName (relForeignTable c) | c <- fromMaybe [] findParent]
325+
suggestParent = getFuzzyHint HintRelParent parent parentList
326+
suggestChild = getFuzzyHint HintRelChildren child childrenList
329327

330328
-- |
331329
-- If no function is found with the given name, it does a fuzzy search to all the functions
@@ -362,43 +360,78 @@ noRelBetweenHint parent child schema allRels = ("Perhaps you meant '" <>) <$>
362360
-- Just "Perhaps you meant to call the function api.test(attr, id)"
363361
--
364362
-- >>> noRpcHint "api" "test" ["noclosealternative"] procs procsDesc
365-
-- Nothing
363+
-- Just "Perhaps you meant to call the function api.test(attr, id)"
366364
--
367365
noRpcHint :: Text -> Text -> [Text] -> [QualifiedIdentifier] -> [Routine] -> Maybe Text
368366
noRpcHint schema procName params allProcs overloadedProcs =
369367
fmap (("Perhaps you meant to call the function " <> schema <> ".") <>) possibleProcs
370368
where
371-
fuzzySetOfProcs = Fuzzy.fromList [qiName k | k <- allProcs, qiSchema k == schema]
372-
fuzzySetOfParams = Fuzzy.fromList $ listToText <$> [[ppName prm | prm <- pdParams ov] | ov <- overloadedProcs]
369+
listOfProcs = [qiName k | k <- allProcs, qiSchema k == schema]
370+
listOfParams = listToText <$> [[ppName prm | prm <- pdParams ov] | ov <- overloadedProcs]
373371
-- Cannot do a fuzzy search like: Fuzzy.getOne [[Text]] [Text], where [[Text]] is the list of params for each
374372
-- overloaded function and [Text] the given params. This converts those lists to text to make fuzzy search possible.
375373
-- E.g. ["val", "param", "name"] into "(name, param, val)"
376374
listToText = ("(" <>) . (<> ")") . T.intercalate ", " . sort
377375
possibleProcs
378-
| null overloadedProcs = getFuzzyHint HintProcedure fuzzySetOfProcs procName
379-
| otherwise = (procName <>) <$> getFuzzyHint HintParams fuzzySetOfParams (listToText params)
376+
| null overloadedProcs = getFuzzyHint HintProcedure procName listOfProcs
377+
| otherwise = (procName <>) <$> getFuzzyHint HintParams (listToText params) listOfParams
380378

381379
-- |
382380
-- Do a fuzzy search in all tables in the same schema and return closest result
383-
tableNotFoundHint :: Text -> Text -> SchemaCache -> Maybe Text
384-
tableNotFoundHint schema tblName SchemaCache{dbTablesFuzzyIndex}
381+
tableNotFoundHint :: Text -> Text -> [Table] -> Maybe Text
382+
tableNotFoundHint schema tblName tblList
385383
= fmap (\tbl -> "Perhaps you meant the table '" <> schema <> "." <> tbl <> "'") perhapsTable
386384
where
387-
perhapsTable = (\fuzzySet -> getFuzzyHint HintTable fuzzySet tblName) =<< HM.lookup schema dbTablesFuzzyIndex
385+
perhapsTable =
386+
if length tblList < maxDbTablesForFuzzySearch
387+
then getFuzzyHint HintTable tblName tblNames
388+
else Nothing
389+
tblNames = [ tableName tbl | tbl <- tblList, tableSchema tbl == schema]
390+
maxDbTablesForFuzzySearch = 500
388391

389392
data HintType
390393
= HintTable
391394
| HintProcedure
392395
| HintParams
396+
| HintRelParent
397+
| HintRelChildren
398+
deriving Eq
393399

394-
-- | Get hint using Fuzzy Search with at least 0.75 similarity score
395-
getFuzzyHint :: HintType -> Fuzzy.FuzzySet -> Text -> Maybe Text
396-
getFuzzyHint hintType =
397-
let minScore = 0.75 :: Double -- used for table and procedure name hints
400+
-- | Get Fuzzy Hint comparing name with a list of names
401+
getFuzzyHint :: HintType -> Text -> [Text] -> Maybe Text
402+
getFuzzyHint hintType name nameList =
403+
let
404+
maxDistanceForTableAndProc = 3
398405
in case hintType of
399-
HintTable -> Fuzzy.getOneWithMinScore minScore
400-
HintProcedure -> Fuzzy.getOneWithMinScore minScore
401-
HintParams -> Fuzzy.getOne -- For params, we stick to `getOne` which defaults to 0.33 min score, not a security risk to reveal params
406+
-- TODO: Refactor and make it DRY
407+
HintTable -> checkLevenshteinDistance name nameList maxDistanceForTableAndProc
408+
HintProcedure -> checkLevenshteinDistance name nameList maxDistanceForTableAndProc
409+
HintParams -> checkMinimumLevenshteinDistance name nameList Nothing maxInt
410+
HintRelParent -> checkMinimumLevenshteinDistance name nameList Nothing maxInt
411+
HintRelChildren -> checkMinimumLevenshteinDistance name nameList Nothing maxInt
412+
where
413+
-- |
414+
-- Check Levenshtein Distance and return hint lower than max distance
415+
checkLevenshteinDistance :: Text -> [Text] -> Int -> Maybe Text
416+
checkLevenshteinDistance _ [] _ = Nothing
417+
checkLevenshteinDistance identName (suggest:suggests) dist =
418+
case Fuzzy.levenshteinLessEqual identName suggest dist of
419+
Just _ -> Just suggest
420+
Nothing -> checkLevenshteinDistance identName suggests dist
421+
422+
-- |
423+
-- Check Levenshtein Distance and return hint with minimum distance
424+
checkMinimumLevenshteinDistance :: Text -> [Text] -> Maybe Text -> Int -> Maybe Text
425+
checkMinimumLevenshteinDistance _ [] Nothing _ = Nothing
426+
checkMinimumLevenshteinDistance _ [] (Just suggest) _ = Just suggest
427+
checkMinimumLevenshteinDistance identName (suggest:suggests) currentSuggest minDist =
428+
let dist = Fuzzy.levenshtein identName suggest
429+
in if dist < minDist
430+
then
431+
if dist == 0 && hintType == HintRelChildren -- Do not give suggestion if the child is found in the relations (dist = 0)
432+
then checkMinimumLevenshteinDistance identName suggests currentSuggest minDist -- Go with current suggestion
433+
else checkMinimumLevenshteinDistance identName suggests (Just suggest) dist -- Update suggestion
434+
else checkMinimumLevenshteinDistance identName suggests currentSuggest minDist -- Go with current suggestion
402435

403436
compressedRel :: Relationship -> JSON.Value
404437
-- An ambiguousness error cannot happen for computed relationships TODO refactor so this mempty is not needed

src/PostgREST/Error/Types.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ module PostgREST.Error.Types
2020
import qualified Hasql.Pool as SQL
2121

2222
import PostgREST.MediaType (MediaType (..))
23-
import PostgREST.SchemaCache (SchemaCache (..))
2423
import PostgREST.SchemaCache.Identifiers (QualifiedIdentifier (..))
2524
import PostgREST.SchemaCache.Relationship (Relationship (..),
2625
RelationshipsMap)
2726
import PostgREST.SchemaCache.Routine (Routine (..))
27+
import PostgREST.SchemaCache.Table (Table (..))
2828
import Protolude
2929

3030
data Error
@@ -85,7 +85,7 @@ data SchemaCacheError
8585
| NoRelBetween Text Text (Maybe Text) Text RelationshipsMap
8686
| NoRpc Text Text [Text] MediaType Bool [QualifiedIdentifier] [Routine]
8787
| ColumnNotFound Text Text
88-
| TableNotFound Text Text SchemaCache
88+
| TableNotFound Text Text [Table]
8989
deriving Show
9090

9191
-- JWT ERRORS: PGRST3XX

src/PostgREST/Plan.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -811,9 +811,9 @@ validateAggFunctions aggFunctionsAllowed (Node rp@ReadPlan {select} forest)
811811

812812
-- | Lookup table in the schema cache before creating read plan
813813
findTable :: QualifiedIdentifier -> SchemaCache -> Either Error QualifiedIdentifier
814-
findTable qi@QualifiedIdentifier{..} sc@SchemaCache{dbTables} =
814+
findTable qi@QualifiedIdentifier{..} SchemaCache{dbTables} =
815815
case HM.lookup qi dbTables of
816-
Nothing -> Left $ SchemaCacheErr $ TableNotFound qiSchema qiName sc
816+
Nothing -> Left $ SchemaCacheErr $ TableNotFound qiSchema qiName (HM.elems dbTables)
817817
Just _ -> Right qi
818818

819819
addFilters :: ResolverContext -> ApiRequest -> ReadPlanTree -> Either Error ReadPlanTree

src/PostgREST/Response.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,10 +213,10 @@ actionResponse (MaybeDbResult InspectPlan{ipHdrsOnly=headersOnly} body) ApiReque
213213
in
214214
Right $ PgrstResponse HTTP.status200 (MediaType.toContentType MTOpenAPI : cLHeader ++ maybeToList (profileHeader iSchema iNegotiatedByProfile)) rsBody
215215

216-
actionResponse (NoDbResult (RelInfoPlan qi@QualifiedIdentifier{..})) _ _ _ sc@SchemaCache{dbTables} =
216+
actionResponse (NoDbResult (RelInfoPlan qi@QualifiedIdentifier{..})) _ _ _ SchemaCache{dbTables} =
217217
case HM.lookup qi dbTables of
218218
Just tbl -> respondInfo $ allowH tbl
219-
Nothing -> Left $ Error.SchemaCacheErr $ Error.TableNotFound qiSchema qiName sc
219+
Nothing -> Left $ Error.SchemaCacheErr $ Error.TableNotFound qiSchema qiName (HM.elems dbTables)
220220
where
221221
allowH table =
222222
let hasPK = not . null $ tablePKCols table in

src/PostgREST/SchemaCache.hs

Lines changed: 12 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ These queries are executed once at startup or when PostgREST is reloaded.
2020

2121
module PostgREST.SchemaCache
2222
( SchemaCache(..)
23-
, TablesFuzzyIndex
2423
, querySchemaCache
2524
, showSummary
2625
, decodeFuncs
@@ -72,29 +71,22 @@ import PostgREST.SchemaCache.Table (Column (..), ColumnMap,
7271

7372
import qualified PostgREST.MediaType as MediaType
7473

75-
import Control.Arrow ((&&&))
76-
import qualified Data.FuzzySet as Fuzzy
77-
import Protolude
78-
import System.IO.Unsafe (unsafePerformIO)
79-
80-
type TablesFuzzyIndex = HM.HashMap Schema Fuzzy.FuzzySet
74+
import Control.Arrow ((&&&))
75+
import Protolude
76+
import System.IO.Unsafe (unsafePerformIO)
8177

8278
data SchemaCache = SchemaCache
83-
{ dbTables :: TablesMap
84-
, dbRelationships :: RelationshipsMap
85-
, dbRoutines :: RoutineMap
86-
, dbRepresentations :: RepresentationsMap
87-
, dbMediaHandlers :: MediaHandlerMap
88-
, dbTimezones :: TimezoneNames
89-
-- Memoized fuzzy index of table names per schema to support approximate matching
90-
-- Since index construction can be expensive, we build it once and store in the SchemaCache
91-
-- Haskell lazy evaluation ensures it's only built on first use and memoized afterwards
92-
, dbTablesFuzzyIndex :: TablesFuzzyIndex
93-
, dbQueryTimings :: Maybe QueryTimings -- ^ cached time for the time each query took when debugging
79+
{ dbTables :: TablesMap
80+
, dbRelationships :: RelationshipsMap
81+
, dbRoutines :: RoutineMap
82+
, dbRepresentations :: RepresentationsMap
83+
, dbMediaHandlers :: MediaHandlerMap
84+
, dbTimezones :: TimezoneNames
85+
, dbQueryTimings :: Maybe QueryTimings -- ^ cached time for the time each query took when debugging
9486
} deriving (Show)
9587

9688
instance JSON.ToJSON SchemaCache where
97-
toJSON (SchemaCache tabs rels routs reps hdlers tzs _ _) = JSON.object [
89+
toJSON (SchemaCache tabs rels routs reps hdlers tzs _) = JSON.object [
9890
"dbTables" .= JSON.toJSON tabs
9991
, "dbRelationships" .= JSON.toJSON rels
10092
, "dbRoutines" .= JSON.toJSON routs
@@ -104,7 +96,7 @@ instance JSON.ToJSON SchemaCache where
10496
]
10597

10698
showSummary :: SchemaCache -> Text
107-
showSummary (SchemaCache tbls rels routs reps mediaHdlrs tzs _ _) =
99+
showSummary (SchemaCache tbls rels routs reps mediaHdlrs tzs _) =
108100
T.intercalate ", "
109101
[ show (HM.size tbls) <> " Relations"
110102
, show (HM.size rels) <> " Relationships"
@@ -152,9 +144,6 @@ data KeyDep
152144
-- | A SQL query that can be executed independently
153145
type SqlQuery = ByteString
154146

155-
maxDbTablesForFuzzySearch :: Int
156-
maxDbTablesForFuzzySearch = 500
157-
158147
querySchemaCache :: AppConfig -> SQL.Transaction SchemaCache
159148
querySchemaCache conf@AppConfig{..} = do
160149
SQL.sql "set local schema ''" -- This voids the search path. The following queries need this for getting the fully qualified name(schema.name) of every db object
@@ -189,11 +178,6 @@ querySchemaCache conf@AppConfig{..} = do
189178
, dbRepresentations = reps
190179
, dbMediaHandlers = HM.union mHdlers initialMediaHandlers -- the custom handlers will override the initial ones
191180
, dbTimezones = tzones
192-
193-
, dbTablesFuzzyIndex =
194-
-- Only build fuzzy index for schemas with a reasonable number of tables
195-
-- Fuzzy.FuzzySet is memory heavy we just don't use it for large schemas
196-
Fuzzy.fromList <$> HM.filter ((< maxDbTablesForFuzzySearch) . length) (HM.fromListWith (<>) ((qiSchema &&& pure . qiName) <$> HM.keys tabsWViewsPks))
197181
, dbQueryTimings = qsTime
198182
}
199183
where
@@ -234,7 +218,6 @@ removeInternal schemas dbStruct =
234218
, dbRepresentations = dbRepresentations dbStruct -- no need to filter, not directly exposed through the API
235219
, dbMediaHandlers = dbMediaHandlers dbStruct
236220
, dbTimezones = dbTimezones dbStruct
237-
, dbTablesFuzzyIndex = dbTablesFuzzyIndex dbStruct
238221
, dbQueryTimings = dbQueryTimings dbStruct
239222
}
240223
where

stack.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ nix:
99

1010
extra-deps:
1111
- configurator-pg-0.2.11
12-
- fuzzyset-0.2.4
12+
- fuzzystrmatch-pg-0.1.0.0
1313
- hasql-1.6.4.4
1414
- hasql-dynamic-statements-0.3.1.5
1515
- hasql-implicits-0.1.1.3
@@ -25,5 +25,5 @@ extra-deps:
2525

2626
allow-newer: true
2727
allow-newer-deps:
28-
- fuzzyset
28+
- fuzzystrmatch-pg
2929
- hasql

stack.yaml.lock

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@ packages:
1212
original:
1313
hackage: configurator-pg-0.2.11
1414
- completed:
15-
hackage: fuzzyset-0.2.4@sha256:f1b6de8bf33277bf6255207541d65028f1f1ea93af5541b654c86b5674995485,1618
15+
hackage: fuzzystrmatch-pg-0.1.0.0@sha256:f676403cfaa4ce0b91386c649b863391115ff957d5781c172093d0b1452b8793,1952
1616
pantry-tree:
17-
sha256: cee68e8d88f530e9e0588b81b260236936fe3318ef9a66e9f43f680b4cd5f76e
18-
size: 574
17+
sha256: a33408683208cfd8351a43c815ccd8773006ef3bc56cff66c4b15a94a8570e63
18+
size: 485
1919
original:
20-
hackage: fuzzyset-0.2.4
20+
hackage: fuzzystrmatch-pg-0.1.0.0
2121
- completed:
2222
hackage: hasql-1.6.4.4@sha256:a26b346aaf33b903f011f8c47a1a1230ea2b0aa1d8325aaf779da425d6c076c5,4391
2323
pantry-tree:
@@ -95,6 +95,13 @@ packages:
9595
size: 736
9696
original:
9797
hackage: text-builder-dev-0.3.10
98+
- completed:
99+
hackage: warp-3.4.13@sha256:ccd1fb8765166ca31928635fffdab85569b7a0f2a81cc11c9a5b91eab663eda6,10066
100+
pantry-tree:
101+
sha256: dfe50280b7d9549f7eebedc35f62d633ecb185ba0d9686146bb39074c3055df5
102+
size: 4175
103+
original:
104+
hackage: warp-3.4.13
98105
snapshots:
99106
- completed:
100107
sha256: 655e468f774beee1badf07dc4c45fb50288d5c66ce7bef6f487b7f92891a90b0

0 commit comments

Comments
 (0)