Skip to content

Commit

Permalink
Merge pull request #1019 from WolframResearch/feature/build-with-abso…
Browse files Browse the repository at this point in the history
…lute-paths

Added option to build vector databases in target directory with absolute paths
  • Loading branch information
rhennigan authored Jan 9, 2025
2 parents 3990981 + 77cc3b3 commit 94dd11e
Showing 1 changed file with 42 additions and 9 deletions.
51 changes: 42 additions & 9 deletions Developer/VectorDatabases/VectorDatabaseBuilder.wl
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ $incrementalBuildBatchSize = 512;
$dbConnectivity = 16;
$dbExpansionAdd = 256;
$dbExpansionSearch = 2048;
$relativePaths = Automatic;

(* ::**************************************************************************************************************:: *)
(* ::Subsection::Closed:: *)
Expand Down Expand Up @@ -143,15 +144,17 @@ BuildVectorDatabase // ClearAll;
BuildVectorDatabase // Options = {
"Connectivity" :> $dbConnectivity,
"ExpansionAdd" :> $dbExpansionAdd,
"ExpansionSearch" :> $dbExpansionSearch
"ExpansionSearch" :> $dbExpansionSearch,
"RelativePaths" :> $relativePaths
};

BuildVectorDatabase[ All, opts: OptionsPattern[ ] ] :=
Block[
{
$dbConnectivity = OptionValue[ "Connectivity" ],
$dbExpansionAdd = OptionValue[ "ExpansionAdd" ],
$dbExpansionSearch = OptionValue[ "ExpansionSearch" ]
$dbExpansionSearch = OptionValue[ "ExpansionSearch" ],
$relativePaths = checkRelativePaths[ OptionValue[ "RelativePaths" ], True ]
},
AssociationMap[ BuildVectorDatabase, FileBaseName /@ getVectorDBSourceFile @ All ]
];
Expand All @@ -161,16 +164,30 @@ BuildVectorDatabase[ name_String, opts: OptionsPattern[ ] ] := Enclose[
{
$dbConnectivity = OptionValue[ "Connectivity" ],
$dbExpansionAdd = OptionValue[ "ExpansionAdd" ],
$dbExpansionSearch = OptionValue[ "ExpansionSearch" ]
$dbExpansionSearch = OptionValue[ "ExpansionSearch" ],
$relativePaths = checkRelativePaths[ OptionValue[ "RelativePaths" ], True ]
},
WithCleanup[
SetDirectory @ ensureDirectory @ $vectorDBTargetDirectory,
ConfirmMatch[ buildVectorDatabase @ name, $$vectorDatabase, "Build" ],
ResetDirectory[ ]
If[ TrueQ @ $relativePaths,
WithCleanup[
SetDirectory @ ensureDirectory @ $vectorDBTargetDirectory,
ConfirmMatch[ buildVectorDatabase @ name, $$vectorDatabase, "Build" ],
ResetDirectory[ ]
],
ConfirmMatch[ buildVectorDatabase @ name, $$vectorDatabase, "Build" ]
]
]
];

BuildVectorDatabase[ id_, dir_, opts: OptionsPattern[ ] ] := Enclose[
Block[
{
$vectorDBTargetDirectory = ConfirmBy[ GeneralUtilities`EnsureDirectory @ dir, DirectoryQ, "Directory" ],
$relativePaths = checkRelativePaths[ OptionValue[ "RelativePaths" ], False ]
},
BuildVectorDatabase[ id, opts ]
]
];


buildVectorDatabase // ClearAll;

Expand All @@ -180,7 +197,12 @@ buildVectorDatabase[ name_String ] :=
loadEmbeddingCache[ ];

dir = ConfirmBy[ ensureDirectory @ { $vectorDBTargetDirectory, name }, DirectoryQ, "Directory" ];
rel = ConfirmBy[ ResourceFunction[ "RelativePath" ][ dir ], DirectoryQ, "Relative" ];

rel = If[ TrueQ @ $relativePaths,
ConfirmBy[ ResourceFunction[ "RelativePath" ][ dir ], DirectoryQ, "Relative" ],
dir
];

src = ConfirmBy[ getVectorDBSourceFile @ name, FileExistsQ, "File" ];

DeleteFile /@ FileNames[ { "*.wxf", "*.usearch" }, dir ];
Expand Down Expand Up @@ -231,7 +253,10 @@ buildVectorDatabase[ name_String ] :=
ConfirmBy[ rewriteDBData[ rel, name ], FileExistsQ, "Rewrite" ];

built = ConfirmMatch[
VectorDatabaseObject @ File @ FileNameJoin @ { rel, name <> ".wxf" },
If[ TrueQ @ $relativePaths,
VectorDatabaseObject @ File @ FileNameJoin @ { rel, name <> ".wxf" },
VectorDatabaseObject[ File @ FileNameJoin @ { dir, name <> ".wxf" }, OverwriteTarget -> True ]
],
$$vectorDatabase,
"Result"
];
Expand Down Expand Up @@ -265,6 +290,14 @@ buildVectorDatabase[ name_String ] :=
ConfirmMatch[ built, $$vectorDatabase, "Result" ]
];

(* ::**************************************************************************************************************:: *)
(* ::Subsubsection::Closed:: *)
(*checkRelativePaths*)
checkRelativePaths // ClearAll;
checkRelativePaths[ relative: True|False, default_ ] := relative;
checkRelativePaths[ relative_, default: True|False ] := default;
checkRelativePaths[ relative_, default_ ] := True;

(* ::**************************************************************************************************************:: *)
(* ::Subsubsection::Closed:: *)
(*setDBDefaults*)
Expand Down

0 comments on commit 94dd11e

Please sign in to comment.