Skip to content

Commit

Permalink
Add type specification support while creating vectors from list (face…
Browse files Browse the repository at this point in the history
…bookincubator#4803)

Summary:
This PR adds the support to specify data type while creating a vector from a Python list. With this change, the user can also create empty or null vectors.

Pull Request resolved: facebookincubator#4803

Reviewed By: mbasmanova

Differential Revision: D47636606

Pulled By: kgpai

fbshipit-source-id: dae0f2735c95e771d1d5fd2c7dd7d7054edaad43
  • Loading branch information
sanjibansg authored and facebook-github-bot committed Aug 2, 2023
1 parent 608343d commit abd74d8
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 4 deletions.
14 changes: 14 additions & 0 deletions pyvelox/pyvelox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,20 @@ static VectorPtr pyListToVector(
variantsToFlatVector, first_kind, variants, pool);
}

static VectorPtr pyListToVector(
const py::list& list,
const facebook::velox::Type& dtype,
facebook::velox::memory::MemoryPool* pool) {
std::vector<velox::variant> variants;
variants.reserve(list.size());
for (auto item : list) {
variants.push_back(pyToVariant(item, dtype));
}

return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH(
variantsToFlatVector, dtype.kind(), variants, pool);
}

template <typename NativeType>
static py::object getItemFromSimpleVector(
SimpleVectorPtr<NativeType>& vector,
Expand Down
62 changes: 58 additions & 4 deletions pyvelox/pyvelox.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,46 @@ inline velox::variant pyToVariant(const py::handle& obj) {
}
}

inline velox::variant pyToVariant(const py::handle& obj, const Type& dtype) {
if (obj.is_none()) {
return velox::variant(dtype.kind());
}
switch (dtype.kind()) {
case TypeKind::BOOLEAN: {
return pyToVariant<velox::TypeKind::BOOLEAN>(obj);
}
case TypeKind::TINYINT: {
return pyToVariant<velox::TypeKind::TINYINT>(obj);
}
case TypeKind::SMALLINT: {
return pyToVariant<velox::TypeKind::SMALLINT>(obj);
}
case TypeKind::INTEGER: {
return pyToVariant<velox::TypeKind::INTEGER>(obj);
}
case TypeKind::BIGINT: {
return pyToVariant<velox::TypeKind::BIGINT>(obj);
}
case TypeKind::REAL: {
return pyToVariant<velox::TypeKind::REAL>(obj);
}
case TypeKind::DOUBLE: {
return pyToVariant<velox::TypeKind::DOUBLE>(obj);
}
case TypeKind::VARCHAR: {
return pyToVariant<velox::TypeKind::VARCHAR>(obj);
}
case TypeKind::VARBINARY: {
return pyToVariant<velox::TypeKind::VARBINARY>(obj);
}
case TypeKind::TIMESTAMP: {
return pyToVariant<velox::TypeKind::TIMESTAMP>(obj);
}
default:
throw py::type_error("Unsupported type supplied");
}
}

static VectorPtr pyToConstantVector(
const py::handle& obj,
vector_size_t length,
Expand All @@ -84,6 +124,11 @@ static inline VectorPtr pyListToVector(
const py::list& list,
facebook::velox::memory::MemoryPool* pool);

static inline VectorPtr pyListToVector(
const py::list& list,
const Type& dtype,
facebook::velox::memory::MemoryPool* pool);

template <TypeKind T>
static VectorPtr createDictionaryVector(
BufferPtr baseVector,
Expand Down Expand Up @@ -416,10 +461,19 @@ static void addVectorBindings(
checkBounds(indices, idx);
return indices.indices->as<vector_size_t>()[idx];
});

m.def("from_list", [](const py::list& list) mutable {
return pyListToVector(list, PyVeloxContext::getSingletonInstance().pool());
});
m.def(
"from_list",
[](const py::list& list, const Type* dtype = nullptr) mutable {
if (!dtype || py::isinstance<py::none>(py::cast(*dtype))) {
return pyListToVector(
list, PyVeloxContext::getSingletonInstance().pool());
} else {
return pyListToVector(
list, *dtype, PyVeloxContext::getSingletonInstance().pool());
}
},
py::arg("list"),
py::arg("dtype") = nullptr);
m.def(
"constant_vector",
[](const py::handle& obj, vector_size_t length, TypePtr type) {
Expand Down
30 changes: 30 additions & 0 deletions pyvelox/test/test_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,36 @@ def test_from_list(self):
with self.assertRaises(ValueError):
pv.from_list([])

def test_from_list_with_type(self):
list_a = [0, 1, 3]
a = pv.from_list(list_a, pv.BooleanType())
self.assertEqual(a.typeKind().name, "BOOLEAN")
for i in range(len(a)):
self.assertTrue(isinstance(a[i], bool))
self.assertEqual(a[i], bool(list_a[i]))
self.assertTrue(
isinstance(
pv.from_list([None, None, None], pv.VarcharType()), pv.BaseVector
)
)
empty_vector = pv.from_list([], pv.IntegerType())
self.assertTrue(isinstance(empty_vector, pv.BaseVector))
with self.assertRaises(IndexError):
a = empty_vector[0]
with self.assertRaises(RuntimeError):
a = pv.from_list(
[0, 1, 3], pv.VarcharType()
) # Conversion not possible from int to varchar
list_b = [0.2, 1.2, 3.23]
b = pv.from_list(list_b, pv.RealType())
for i in range(len(list_b)):
self.assertNotAlmostEqual(list_b[i], b[i], places=17)

# dtype as a keyword argument
integerVector = pv.from_list([1, 3, 11], dtype=pv.IntegerType())
self.assertTrue(isinstance(integerVector, pv.BaseVector))
self.assertEqual(integerVector.typeKind().name, "INTEGER")

def test_constant_encoding(self):
ints = pv.constant_vector(1000, 10)
strings = pv.constant_vector("hello", 100)
Expand Down

0 comments on commit abd74d8

Please sign in to comment.