Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 120 additions & 4 deletions src/iceberg/expression/json_serde.cc
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,126 @@ Result<nlohmann::json> ToJson(const Literal& literal) {
}
}

Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* /*type*/) {
// TODO(gangwu): implement type-aware literal parsing equivalent to Java's
// SingleValueParser.fromJson(type, node).
return LiteralFromJson(json);
Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* type) {
// If {"type": "literal", "value": <actual>} wrapper is present, unwrap it first.
if (json.is_object() && json.contains(kType) &&
json[kType].get<std::string>() == kLiteral && json.contains(kValue)) {
return LiteralFromJson(json[kValue], type);
}
// If no type context is provided, fall back to untyped parsing.
if (type == nullptr) return LiteralFromJson(json);

// Type-aware parsing equivalent to Java's SingleValueParser.fromJson(type, node).
switch (type->type_id()) {
case TypeId::kBoolean:
if (!json.is_boolean()) [[unlikely]]
return JsonParseError("Cannot parse {} as a boolean value", SafeDumpJson(json));
return Literal::Boolean(json.get<bool>());

case TypeId::kInt:
if (!json.is_number_integer()) [[unlikely]]
return JsonParseError("Cannot parse {} as an int value", SafeDumpJson(json));
return Literal::Int(json.get<int32_t>());

case TypeId::kLong:
if (!json.is_number_integer()) [[unlikely]]
return JsonParseError("Cannot parse {} as a long value", SafeDumpJson(json));
return Literal::Long(json.get<int64_t>());

case TypeId::kFloat:
if (!json.is_number()) [[unlikely]]
return JsonParseError("Cannot parse {} as a float value", SafeDumpJson(json));
return Literal::Float(json.get<float>());

case TypeId::kDouble:
if (!json.is_number()) [[unlikely]]
return JsonParseError("Cannot parse {} as a double value", SafeDumpJson(json));
return Literal::Double(json.get<double>());

case TypeId::kString:
if (!json.is_string()) [[unlikely]]
return JsonParseError("Cannot parse {} as a string value", SafeDumpJson(json));
return Literal::String(json.get<std::string>());

// For temporal types (date, time, timestamp, timestamp_tz), we support both integer
// and string representations.
case TypeId::kDate:
if (json.is_number_integer()) return Literal::Date(json.get<int32_t>());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd recommend not support integer representation like this as the timezone processing is really tricky in C++. We cannot really trust arbitrary integers from timestamp values.

if (json.is_string()) {
ICEBERG_ASSIGN_OR_RAISE(auto days,
TransformUtil::ParseDay(json.get<std::string>()));
return Literal::Date(days);
}
return JsonParseError("Cannot parse {} as a date value", SafeDumpJson(json));

case TypeId::kTime:
if (json.is_number_integer()) return Literal::Time(json.get<int64_t>());
if (json.is_string()) {
ICEBERG_ASSIGN_OR_RAISE(auto micros,
TransformUtil::ParseTime(json.get<std::string>()));
return Literal::Time(micros);
}
return JsonParseError("Cannot parse {} as a time value", SafeDumpJson(json));

case TypeId::kTimestamp:
if (json.is_number_integer()) return Literal::Timestamp(json.get<int64_t>());
if (json.is_string()) {
ICEBERG_ASSIGN_OR_RAISE(auto micros,
TransformUtil::ParseTimestamp(json.get<std::string>()));
return Literal::Timestamp(micros);
}
return JsonParseError("Cannot parse {} as a timestamp value", SafeDumpJson(json));

case TypeId::kTimestampTz:
if (json.is_number_integer()) return Literal::TimestampTz(json.get<int64_t>());
if (json.is_string()) {
ICEBERG_ASSIGN_OR_RAISE(
auto micros, TransformUtil::ParseTimestampWithZone(json.get<std::string>()));
return Literal::TimestampTz(micros);
}
return JsonParseError("Cannot parse {} as a timestamptz value", SafeDumpJson(json));

case TypeId::kUuid:
if (json.is_string()) {
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(json.get<std::string>()));
return Literal::UUID(uuid);
}
return JsonParseError("Cannot parse {} as a uuid value", SafeDumpJson(json));
Comment on lines +381 to +385
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (json.is_string()) {
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(json.get<std::string>()));
return Literal::UUID(uuid);
}
return JsonParseError("Cannot parse {} as a uuid value", SafeDumpJson(json));
if (!json.is_string()) {
return JsonParseError("Cannot parse {} as a uuid value", SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(json.get<std::string>()));
return Literal::UUID(uuid);

Let's just be consistent as above? Same for below.


case TypeId::kBinary:
if (json.is_string()) {
ICEBERG_ASSIGN_OR_RAISE(auto bytes,
StringUtils::HexStringToBytes(json.get<std::string>()));
return Literal::Binary(std::move(bytes));
}
return JsonParseError("Cannot parse {} as a binary value", SafeDumpJson(json));

case TypeId::kFixed: {
if (json.is_string()) {
const auto& fixed_type = internal::checked_cast<const FixedType&>(*type);
const std::string& hex = json.get<std::string>();
if (hex.size() != static_cast<size_t>(fixed_type.length()) * 2) [[unlikely]]
return JsonParseError("Cannot parse fixed[{}]: expected {} hex chars, got {}",
fixed_type.length(), fixed_type.length() * 2, hex.size());
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(hex));
return Literal::Fixed(std::move(bytes));
}
return JsonParseError("Cannot parse {} as a fixed value", SafeDumpJson(json));
}

case TypeId::kDecimal: {
if (json.is_string()) {
const auto& dec_type = internal::checked_cast<const DecimalType&>(*type);
ICEBERG_ASSIGN_OR_RAISE(auto dec, Decimal::FromString(json.get<std::string>()));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to check the output scale from Decimal::FromString to make sure it is same as in the type.

return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
}
return JsonParseError("Cannot parse {} as a decimal value", SafeDumpJson(json));
}

default:
return NotSupported("Unsupported type for literal JSON parsing: {}",
type->ToString());
}
}

Result<Literal> LiteralFromJson(const nlohmann::json& json) {
Expand Down
41 changes: 35 additions & 6 deletions src/iceberg/expression/literal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,16 @@
#include <concepts>
#include <cstdint>
#include <string>
#include <vector>

#include "iceberg/type.h"
#include "iceberg/util/checked_cast.h"
#include "iceberg/util/conversions.h"
#include "iceberg/util/decimal.h"
#include "iceberg/util/macros.h"
#include "iceberg/util/string_util.h"
#include "iceberg/util/temporal_util.h"
#include "iceberg/util/transform_util.h"

namespace iceberg {

Expand Down Expand Up @@ -193,12 +198,36 @@ Result<Literal> LiteralCaster::CastFromString(
ICEBERG_ASSIGN_OR_RAISE(auto uuid, Uuid::FromString(str_val));
return Literal::UUID(uuid);
}
case TypeId::kDate:
case TypeId::kTime:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
return NotImplemented("Cast from String to {} is not implemented yet",
target_type->ToString());
case TypeId::kDate: {
ICEBERG_ASSIGN_OR_RAISE(auto days, TransformUtil::ParseDay(str_val));
return Literal::Date(days);
}
case TypeId::kTime: {
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTime(str_val));
return Literal::Time(micros);
}
case TypeId::kTimestamp: {
ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTimestamp(str_val));
return Literal::Timestamp(micros);
}
case TypeId::kTimestampTz: {
ICEBERG_ASSIGN_OR_RAISE(auto micros,
TransformUtil::ParseTimestampWithZone(str_val));
return Literal::TimestampTz(micros);
}
case TypeId::kBinary: {
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val));
return Literal::Binary(std::move(bytes));
}
case TypeId::kFixed: {
ICEBERG_ASSIGN_OR_RAISE(auto bytes, StringUtils::HexStringToBytes(str_val));
return Literal::Fixed(std::move(bytes));
}
case TypeId::kDecimal: {
const auto& dec_type = internal::checked_cast<const DecimalType&>(*target_type);
ICEBERG_ASSIGN_OR_RAISE(auto dec, Decimal::FromString(str_val));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as my other comment, we need to check the parsed scale against dec_type.scale()

return Literal::Decimal(dec.value(), dec_type.precision(), dec_type.scale());
}
default:
return NotSupported("Cast from String to {} is not supported",
target_type->ToString());
Expand Down
91 changes: 91 additions & 0 deletions src/iceberg/test/expression_json_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/

#include <memory>
#include <optional>
#include <string>
#include <vector>

Expand All @@ -31,6 +32,7 @@
#include "iceberg/expression/literal.h"
#include "iceberg/expression/predicate.h"
#include "iceberg/schema.h"
#include "iceberg/schema_field.h"
#include "iceberg/test/matchers.h"
#include "iceberg/type.h"

Expand Down Expand Up @@ -405,4 +407,93 @@ INSTANTIATE_TEST_SUITE_P(
return info.param.name;
});

// --- LiteralFromJson(json, type) type-aware tests ---

struct LiteralFromJsonTypedParam {
std::string name;
nlohmann::json json;
std::shared_ptr<Type> type;
TypeId expected_type_id;
std::optional<std::string> expected_str;
};

class LiteralFromJsonTypedTest
: public ::testing::TestWithParam<LiteralFromJsonTypedParam> {};

TEST_P(LiteralFromJsonTypedTest, Parses) {
const auto& p = GetParam();
ICEBERG_UNWRAP_OR_FAIL(auto lit, LiteralFromJson(p.json, p.type.get()));
EXPECT_EQ(lit.type()->type_id(), p.expected_type_id);
if (p.expected_str) EXPECT_EQ(lit.ToString(), *p.expected_str);
}

INSTANTIATE_TEST_SUITE_P(
LiteralFromJsonTyped, LiteralFromJsonTypedTest,
::testing::Values(LiteralFromJsonTypedParam{"Boolean", nlohmann::json(true),
boolean(), TypeId::kBoolean, "true"},
LiteralFromJsonTypedParam{"Int", nlohmann::json(123), int32(),
TypeId::kInt, "123"},
LiteralFromJsonTypedParam{"Long", nlohmann::json(9876543210LL),
int64(), TypeId::kLong, "9876543210"},
LiteralFromJsonTypedParam{"Float", nlohmann::json(1.5), float32(),
TypeId::kFloat, std::nullopt},
LiteralFromJsonTypedParam{"Double", nlohmann::json(3.14), float64(),
TypeId::kDouble, std::nullopt},
LiteralFromJsonTypedParam{"String", nlohmann::json("hello"),
string(), TypeId::kString, std::nullopt},
LiteralFromJsonTypedParam{"DateString",
nlohmann::json("2024-01-15"), date(),
TypeId::kDate, std::nullopt},
LiteralFromJsonTypedParam{"DateOrdinal", nlohmann::json(19738),
date(), TypeId::kDate, std::nullopt},
LiteralFromJsonTypedParam{
"Uuid", nlohmann::json("f79c3e09-677c-4bbd-a479-3f349cb785e7"),
uuid(), TypeId::kUuid, std::nullopt},
LiteralFromJsonTypedParam{"Binary", nlohmann::json("deadbeef"),
binary(), TypeId::kBinary, std::nullopt},
LiteralFromJsonTypedParam{"Fixed", nlohmann::json("cafebabe"),
fixed(4), TypeId::kFixed, std::nullopt}),
[](const ::testing::TestParamInfo<LiteralFromJsonTypedParam>& info) {
return info.param.name;
});

struct InvalidLiteralFromJsonTypedParam {
std::string name;
nlohmann::json json;
std::shared_ptr<Type> type;
};

class InvalidLiteralFromJsonTypedTest
: public ::testing::TestWithParam<InvalidLiteralFromJsonTypedParam> {};

TEST_P(InvalidLiteralFromJsonTypedTest, ReturnsError) {
const auto& p = GetParam();
EXPECT_FALSE(LiteralFromJson(p.json, p.type.get()).has_value());
}

INSTANTIATE_TEST_SUITE_P(
LiteralFromJsonTyped, InvalidLiteralFromJsonTypedTest,
::testing::Values(InvalidLiteralFromJsonTypedParam{"BooleanTypeMismatch",
nlohmann::json(42), boolean()},
InvalidLiteralFromJsonTypedParam{"DateTypeMismatch",
nlohmann::json(true), date()},
InvalidLiteralFromJsonTypedParam{"UuidTypeMismatch",
nlohmann::json(42), uuid()},
InvalidLiteralFromJsonTypedParam{"BinaryInvalidHex",
nlohmann::json("xyz"), binary()},
InvalidLiteralFromJsonTypedParam{
"FixedLengthMismatch", nlohmann::json("cafe12"), fixed(4)}),
[](const ::testing::TestParamInfo<InvalidLiteralFromJsonTypedParam>& info) {
return info.param.name;
});

TEST(LiteralFromJsonTyped, SchemaAwareDatePredicateRoundTrip) {
auto schema = std::make_shared<Schema>(
std::vector<SchemaField>{SchemaField::MakeOptional(1, "event_date", date())});
nlohmann::json pred_json = {
{"type", "eq"}, {"term", "event_date"}, {"value", "2024-01-15"}};
ICEBERG_UNWRAP_OR_FAIL(auto expr, ExpressionFromJson(pred_json, schema.get()));
ASSERT_NE(expr, nullptr);
}

} // namespace iceberg
31 changes: 31 additions & 0 deletions src/iceberg/test/literal_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,37 @@ INSTANTIATE_TEST_SUITE_P(
.target_type = uuid(),
.expected_literal = Literal::UUID(
Uuid::FromString("123e4567-e89b-12d3-a456-426614174000").value())},
CastLiteralTestParam{.test_name = "StringToDate",
.source_literal = Literal::String("2024-01-16"),
.target_type = date(),
.expected_literal = Literal::Date(19738)},
CastLiteralTestParam{.test_name = "StringToTime",
.source_literal = Literal::String("14:30"),
.target_type = time(),
.expected_literal = Literal::Time(52200000000LL)},
CastLiteralTestParam{.test_name = "StringToTimestamp",
.source_literal = Literal::String("2026-01-01T00:00:01.500"),
.target_type = timestamp(),
.expected_literal = Literal::Timestamp(1767225601500000L)},
CastLiteralTestParam{
.test_name = "StringToTimestampTz",
.source_literal = Literal::String("2026-01-01T00:00:01.500+00:00"),
.target_type = timestamp_tz(),
.expected_literal = Literal::TimestampTz(1767225601500000L)},
CastLiteralTestParam{.test_name = "StringToBinary",
.source_literal = Literal::String("010203FF"),
.target_type = binary(),
.expected_literal = Literal::Binary(std::vector<uint8_t>{
0x01, 0x02, 0x03, 0xFF})},
CastLiteralTestParam{.test_name = "StringToFixed",
.source_literal = Literal::String("01020304"),
.target_type = fixed(4),
.expected_literal = Literal::Fixed(std::vector<uint8_t>{
0x01, 0x02, 0x03, 0x04})},
CastLiteralTestParam{.test_name = "StringToDecimal",
.source_literal = Literal::String("1234.56"),
.target_type = decimal(6, 2),
.expected_literal = Literal::Decimal(123456, 6, 2)},
// Same type cast test
CastLiteralTestParam{.test_name = "IntToInt",
.source_literal = Literal::Int(42),
Expand Down
Loading
Loading