Skip to content

Commit c508883

Browse files
duckdblabs-botgithub-actions[bot]
authored andcommitted
Update vendored DuckDB sources to ecde6b6514
1 parent a3be833 commit c508883

31 files changed

+179
-81
lines changed

src/duckdb/extension/json/include/json_multi_file_info.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@ class JSONFileReaderOptions : public BaseFileReaderOptions {
1919
};
2020

2121
struct JSONMultiFileInfo : MultiFileReaderInterface {
22-
static unique_ptr<MultiFileReaderInterface> InitializeInterface(ClientContext &context, MultiFileReader &reader,
23-
MultiFileList &file_list);
22+
static unique_ptr<MultiFileReaderInterface> CreateInterface(ClientContext &context);
2423

2524
unique_ptr<BaseFileReaderOptions> InitializeOptions(ClientContext &context,
2625
optional_ptr<TableFunctionInfo> info) override;
@@ -49,6 +48,7 @@ struct JSONMultiFileInfo : MultiFileReaderInterface {
4948
void FinishReading(ClientContext &context, GlobalTableFunctionState &global_state,
5049
LocalTableFunctionState &local_state) override;
5150
unique_ptr<NodeStatistics> GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override;
51+
FileGlobInput GetGlobInput() override;
5252
};
5353

5454
} // namespace duckdb

src/duckdb/extension/json/json_multi_file_info.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44

55
namespace duckdb {
66

7-
unique_ptr<MultiFileReaderInterface>
8-
JSONMultiFileInfo::InitializeInterface(ClientContext &context, MultiFileReader &reader, MultiFileList &file_list) {
7+
unique_ptr<MultiFileReaderInterface> JSONMultiFileInfo::CreateInterface(ClientContext &context) {
98
return make_uniq<JSONMultiFileInfo>();
109
}
1110

@@ -579,4 +578,8 @@ optional_idx JSONMultiFileInfo::MaxThreads(const MultiFileBindData &bind_data, c
579578
return json_data.max_threads;
580579
}
581580

581+
FileGlobInput JSONMultiFileInfo::GetGlobInput() {
582+
return FileGlobInput(FileGlobOptions::FALLBACK_GLOB, "json");
583+
}
584+
582585
} // namespace duckdb

src/duckdb/extension/parquet/include/parquet_multi_file_info.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ class ParquetFileReaderOptions : public BaseFileReaderOptions {
2424
};
2525

2626
struct ParquetMultiFileInfo : MultiFileReaderInterface {
27-
static unique_ptr<MultiFileReaderInterface> InitializeInterface(ClientContext &context, MultiFileReader &reader,
28-
MultiFileList &file_list);
27+
static unique_ptr<MultiFileReaderInterface> CreateInterface(ClientContext &context);
2928

3029
unique_ptr<BaseFileReaderOptions> InitializeOptions(ClientContext &context,
3130
optional_ptr<TableFunctionInfo> info) override;
@@ -56,6 +55,7 @@ struct ParquetMultiFileInfo : MultiFileReaderInterface {
5655
unique_ptr<NodeStatistics> GetCardinality(const MultiFileBindData &bind_data, idx_t file_count) override;
5756
void GetVirtualColumns(ClientContext &context, MultiFileBindData &bind_data, virtual_column_map_t &result) override;
5857
unique_ptr<MultiFileReaderInterface> Copy() override;
58+
FileGlobInput GetGlobInput() override;
5959
};
6060

6161
class ParquetScanFunction {

src/duckdb/extension/parquet/parquet_multi_file_info.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,7 @@ static void BindSchema(ClientContext &context, vector<LogicalType> &return_types
131131
D_ASSERT(names.size() == return_types.size());
132132
}
133133

134-
unique_ptr<MultiFileReaderInterface>
135-
ParquetMultiFileInfo::InitializeInterface(ClientContext &context, MultiFileReader &reader, MultiFileList &file_list) {
134+
unique_ptr<MultiFileReaderInterface> ParquetMultiFileInfo::CreateInterface(ClientContext &context) {
136135
return make_uniq<ParquetMultiFileInfo>();
137136
}
138137

@@ -244,10 +243,10 @@ static unique_ptr<FunctionData> ParquetScanDeserialize(Deserializer &deserialize
244243
for (auto &path : files) {
245244
file_path.emplace_back(path);
246245
}
246+
FileGlobInput input(FileGlobOptions::FALLBACK_GLOB, "parquet");
247247

248248
auto multi_file_reader = MultiFileReader::Create(function);
249-
auto file_list = multi_file_reader->CreateFileList(context, Value::LIST(LogicalType::VARCHAR, file_path),
250-
FileGlobOptions::DISALLOW_EMPTY);
249+
auto file_list = multi_file_reader->CreateFileList(context, Value::LIST(LogicalType::VARCHAR, file_path), input);
251250
auto parquet_options = make_uniq<ParquetFileReaderOptions>(std::move(serialization.parquet_options));
252251
auto interface = make_uniq<ParquetMultiFileInfo>();
253252
auto bind_data = MultiFileFunction<ParquetMultiFileInfo>::MultiFileBindInternal(
@@ -588,4 +587,8 @@ unique_ptr<MultiFileReaderInterface> ParquetMultiFileInfo::Copy() {
588587
return make_uniq<ParquetMultiFileInfo>();
589588
}
590589

590+
FileGlobInput ParquetMultiFileInfo::GetGlobInput() {
591+
return FileGlobInput(FileGlobOptions::FALLBACK_GLOB, "parquet");
592+
}
593+
591594
} // namespace duckdb

src/duckdb/src/common/enum_util.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1849,19 +1849,20 @@ FileExpandResult EnumUtil::FromString<FileExpandResult>(const char *value) {
18491849
const StringUtil::EnumStringLiteral *GetFileGlobOptionsValues() {
18501850
static constexpr StringUtil::EnumStringLiteral values[] {
18511851
{ static_cast<uint32_t>(FileGlobOptions::DISALLOW_EMPTY), "DISALLOW_EMPTY" },
1852-
{ static_cast<uint32_t>(FileGlobOptions::ALLOW_EMPTY), "ALLOW_EMPTY" }
1852+
{ static_cast<uint32_t>(FileGlobOptions::ALLOW_EMPTY), "ALLOW_EMPTY" },
1853+
{ static_cast<uint32_t>(FileGlobOptions::FALLBACK_GLOB), "FALLBACK_GLOB" }
18531854
};
18541855
return values;
18551856
}
18561857

18571858
template<>
18581859
const char* EnumUtil::ToChars<FileGlobOptions>(FileGlobOptions value) {
1859-
return StringUtil::EnumToString(GetFileGlobOptionsValues(), 2, "FileGlobOptions", static_cast<uint32_t>(value));
1860+
return StringUtil::EnumToString(GetFileGlobOptionsValues(), 3, "FileGlobOptions", static_cast<uint32_t>(value));
18601861
}
18611862

18621863
template<>
18631864
FileGlobOptions EnumUtil::FromString<FileGlobOptions>(const char *value) {
1864-
return static_cast<FileGlobOptions>(StringUtil::StringToEnum(GetFileGlobOptionsValues(), 2, "FileGlobOptions", value));
1865+
return static_cast<FileGlobOptions>(StringUtil::StringToEnum(GetFileGlobOptionsValues(), 3, "FileGlobOptions", value));
18651866
}
18661867

18671868
const StringUtil::EnumStringLiteral *GetFileLockTypeValues() {

src/duckdb/src/common/file_system.cpp

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,17 @@ string FileSystem::ExtractBaseName(const string &path) {
333333
return vec[0];
334334
}
335335

336+
string FileSystem::ExtractExtension(const string &path) {
337+
if (path.empty()) {
338+
return string();
339+
}
340+
auto vec = StringUtil::Split(ExtractName(path), ".");
341+
if (vec.size() < 2) {
342+
return string();
343+
}
344+
return vec.back();
345+
}
346+
336347
string FileSystem::GetHomeDirectory(optional_ptr<FileOpener> opener) {
337348
// read the home_directory setting first, if it is set
338349
if (opener) {
@@ -626,7 +637,7 @@ static string LookupExtensionForPattern(const string &pattern) {
626637
return "";
627638
}
628639

629-
vector<OpenFileInfo> FileSystem::GlobFiles(const string &pattern, ClientContext &context, FileGlobOptions options) {
640+
vector<OpenFileInfo> FileSystem::GlobFiles(const string &pattern, ClientContext &context, const FileGlobInput &input) {
630641
auto result = Glob(pattern);
631642
if (result.empty()) {
632643
string required_extension = LookupExtensionForPattern(pattern);
@@ -648,9 +659,19 @@ vector<OpenFileInfo> FileSystem::GlobFiles(const string &pattern, ClientContext
648659
throw InternalException("Extension load \"%s\" did not throw but somehow the extension was not loaded",
649660
required_extension);
650661
}
651-
return GlobFiles(pattern, context, options);
662+
return GlobFiles(pattern, context, input);
663+
}
664+
if (input.behavior == FileGlobOptions::FALLBACK_GLOB && !HasGlob(pattern)) {
665+
// if we have no glob in the pattern and we have an extension, we try to glob
666+
if (!HasGlob(pattern)) {
667+
if (input.extension.empty()) {
668+
throw InternalException("FALLBACK_GLOB requires an extension to be specified");
669+
}
670+
string new_pattern = JoinPath(JoinPath(pattern, "**"), "*." + input.extension);
671+
return GlobFiles(new_pattern, context, FileGlobOptions::DISALLOW_EMPTY);
672+
}
652673
}
653-
if (options == FileGlobOptions::DISALLOW_EMPTY) {
674+
if (input.behavior == FileGlobOptions::FALLBACK_GLOB || input.behavior == FileGlobOptions::DISALLOW_EMPTY) {
654675
throw IOException("No files found that match the pattern \"%s\"", pattern);
655676
}
656677
}

src/duckdb/src/common/multi_file/multi_file_function.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ namespace duckdb {
66
MultiFileReaderInterface::~MultiFileReaderInterface() {
77
}
88

9+
void MultiFileReaderInterface::InitializeInterface(ClientContext &context, MultiFileReader &reader,
10+
MultiFileList &file_list) {
11+
}
12+
913
void MultiFileReaderInterface::FinalizeCopyBind(ClientContext &context, BaseFileReaderOptions &options,
1014
const vector<string> &expected_names,
1115
const vector<LogicalType> &expected_types) {
@@ -41,4 +45,8 @@ unique_ptr<MultiFileReaderInterface> MultiFileReaderInterface::Copy() {
4145
throw InternalException("MultiFileReaderInterface::Copy is not implemented for this file interface");
4246
}
4347

48+
FileGlobInput MultiFileReaderInterface::GetGlobInput() {
49+
return FileGlobOptions::DISALLOW_EMPTY;
50+
}
51+
4452
} // namespace duckdb

src/duckdb/src/common/multi_file/multi_file_list.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,12 @@ const OpenFileInfo &MultiFileListIterationHelper::MultiFileListIterator::operato
136136
//===--------------------------------------------------------------------===//
137137
// MultiFileList
138138
//===--------------------------------------------------------------------===//
139+
MultiFileList::MultiFileList(vector<OpenFileInfo> paths, FileGlobInput glob_input_p)
140+
: paths(std::move(paths)), glob_input(std::move(glob_input_p)) {
141+
}
142+
139143
MultiFileList::MultiFileList(vector<OpenFileInfo> paths, FileGlobOptions options)
140-
: paths(std::move(paths)), glob_options(options) {
144+
: MultiFileList(std::move(paths), FileGlobInput(options)) {
141145
}
142146

143147
MultiFileList::~MultiFileList() {
@@ -270,8 +274,8 @@ idx_t SimpleMultiFileList::GetTotalFileCount() {
270274
//===--------------------------------------------------------------------===//
271275
// GlobMultiFileList
272276
//===--------------------------------------------------------------------===//
273-
GlobMultiFileList::GlobMultiFileList(ClientContext &context_p, vector<OpenFileInfo> paths_p, FileGlobOptions options)
274-
: MultiFileList(std::move(paths_p), options), context(context_p), current_path(0) {
277+
GlobMultiFileList::GlobMultiFileList(ClientContext &context_p, vector<OpenFileInfo> paths_p, FileGlobInput glob_input)
278+
: MultiFileList(std::move(paths_p), std::move(glob_input)), context(context_p), current_path(0) {
275279
}
276280

277281
unique_ptr<MultiFileList> GlobMultiFileList::ComplexFilterPushdown(ClientContext &context_p,
@@ -369,7 +373,7 @@ bool GlobMultiFileList::ExpandPathInternal(idx_t &current_path, vector<OpenFileI
369373
}
370374

371375
auto &fs = FileSystem::GetFileSystem(context);
372-
auto glob_files = fs.GlobFiles(paths[current_path].path, context, glob_options);
376+
auto glob_files = fs.GlobFiles(paths[current_path].path, context, glob_input);
373377
std::sort(glob_files.begin(), glob_files.end());
374378
result.insert(result.end(), glob_files.begin(), glob_files.end());
375379

src/duckdb/src/common/multi_file/multi_file_reader.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,22 +116,22 @@ vector<string> MultiFileReader::ParsePaths(const Value &input) {
116116
}
117117

118118
shared_ptr<MultiFileList> MultiFileReader::CreateFileList(ClientContext &context, const vector<string> &paths,
119-
FileGlobOptions options) {
119+
const FileGlobInput &glob_input) {
120120
vector<OpenFileInfo> open_files;
121121
for (auto &path : paths) {
122122
open_files.emplace_back(path);
123123
}
124-
auto res = make_uniq<GlobMultiFileList>(context, std::move(open_files), options);
125-
if (res->GetExpandResult() == FileExpandResult::NO_FILES && options == FileGlobOptions::DISALLOW_EMPTY) {
124+
auto res = make_uniq<GlobMultiFileList>(context, std::move(open_files), glob_input);
125+
if (res->GetExpandResult() == FileExpandResult::NO_FILES && glob_input.behavior != FileGlobOptions::ALLOW_EMPTY) {
126126
throw IOException("%s needs at least one file to read", function_name);
127127
}
128128
return std::move(res);
129129
}
130130

131131
shared_ptr<MultiFileList> MultiFileReader::CreateFileList(ClientContext &context, const Value &input,
132-
FileGlobOptions options) {
132+
const FileGlobInput &glob_input) {
133133
auto paths = ParsePaths(input);
134-
return CreateFileList(context, paths, options);
134+
return CreateFileList(context, paths, glob_input);
135135
}
136136

137137
bool MultiFileReader::ParseOption(const string &key, const Value &val, MultiFileOptions &options,
@@ -645,6 +645,10 @@ void MultiFileReader::PruneReaders(MultiFileBindData &data, MultiFileList &file_
645645
}
646646
}
647647

648+
FileGlobInput MultiFileReader::GetGlobInput(MultiFileReaderInterface &interface) {
649+
return interface.GetGlobInput();
650+
}
651+
648652
HivePartitioningIndex::HivePartitioningIndex(string value_p, idx_t index) : value(std::move(value_p)), index(index) {
649653
}
650654

src/duckdb/src/common/radix_partitioning.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ RETURN_TYPE RadixBitsSwitch(const idx_t radix_bits, ARGS &&... args) {
5151
case 10:
5252
return OP::template Operation<10>(std::forward<ARGS>(args)...);
5353
case 11:
54-
return OP::template Operation<10>(std::forward<ARGS>(args)...);
54+
return OP::template Operation<11>(std::forward<ARGS>(args)...);
5555
case 12:
56-
return OP::template Operation<10>(std::forward<ARGS>(args)...);
56+
return OP::template Operation<12>(std::forward<ARGS>(args)...);
5757
default:
5858
throw InternalException(
5959
"radix_bits higher than RadixPartitioning::MAX_RADIX_BITS encountered in RadixBitsSwitch");

0 commit comments

Comments
 (0)