Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 48 additions & 4 deletions src/backend/utils/adt/agtype.c
Original file line number Diff line number Diff line change
Expand Up @@ -4871,6 +4871,22 @@ Datum agtype_hash_cmp(PG_FUNCTION_ARGS)
agtype_iterator_token tok;
agtype_value *r;
uint64 seed = 0xF0F0F0F0;
/*
* Stack of "is the current open array a raw_scalar pseudo-array?" so
* that the WAGT_END_ARRAY case can match the choice we made at
* WAGT_BEGIN_ARRAY. The iterator does not populate *val on END tokens
* (see comment above agtype_iterator_next), so r->val.array.raw_scalar
* is uninitialized at that point and reading it as a bool is undefined
* behavior (UBSan flagged values like 127 here).
*
* Arrays in agtype are bounded by AGTYPE_CONTAINER_SIZE which fits in
* AGT_CMASK; a fixed-size stack of 64 levels is far more than any real
* agtype graph value will ever produce. Bail out with an error if we
* exceed it rather than silently corrupting the hash.
*/
#define HASH_RAW_SCALAR_STACK_DEPTH 64
bool raw_scalar_stack[HASH_RAW_SCALAR_STACK_DEPTH];
int raw_scalar_top = -1;

/* this function returns INTEGER which is 32 bits */
if (PG_ARGISNULL(0))
Expand All @@ -4887,18 +4903,46 @@ Datum agtype_hash_cmp(PG_FUNCTION_ARGS)
{
if (IS_A_AGTYPE_SCALAR(r) && AGTYPE_ITERATOR_TOKEN_IS_HASHABLE(tok))
agtype_hash_scalar_value_extended(r, &hash, seed);
else if (tok == WAGT_BEGIN_ARRAY && !r->val.array.raw_scalar)
seed = LEFT_ROTATE(seed, 4);
else if (tok == WAGT_BEGIN_ARRAY)
{
/* push raw_scalar state for the matching END token */
raw_scalar_top++;
if (raw_scalar_top >= HASH_RAW_SCALAR_STACK_DEPTH)
{
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("agtype_hash_cmp: array nesting depth exceeds %d",
HASH_RAW_SCALAR_STACK_DEPTH)));
}
raw_scalar_stack[raw_scalar_top] = r->val.array.raw_scalar;
if (!r->val.array.raw_scalar)
{
seed = LEFT_ROTATE(seed, 4);
}
}
else if (tok == WAGT_BEGIN_OBJECT)
seed = LEFT_ROTATE(seed, 6);
else if (tok == WAGT_END_ARRAY && !r->val.array.raw_scalar)
seed = RIGHT_ROTATE(seed, 4);
else if (tok == WAGT_END_ARRAY)
{
bool was_raw_scalar;

/* pop matching BEGIN's raw_scalar state */
Assert(raw_scalar_top >= 0);
was_raw_scalar = raw_scalar_stack[raw_scalar_top];
raw_scalar_top--;
if (!was_raw_scalar)
{
seed = RIGHT_ROTATE(seed, 4);
}
}
else if (tok == WAGT_END_OBJECT)
seed = RIGHT_ROTATE(seed, 4);

seed = LEFT_ROTATE(seed, 1);
}

#undef HASH_RAW_SCALAR_STACK_DEPTH

pfree_if_not_null(r);
PG_FREE_IF_COPY(agt, 0);

Expand Down
21 changes: 17 additions & 4 deletions src/backend/utils/adt/agtype_ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,14 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry,
/* copy in the int_value data */
numlen = sizeof(int64);
offset = reserve_from_buffer(buffer, numlen);
*((int64 *)(buffer->data + offset)) = scalar_val->val.int_value;
/*
* Use memcpy because the AGT_HEADER (4 bytes) leaves the buffer
* write position 4-byte-aligned but not necessarily 8-byte-aligned.
* Direct *((int64 *) ...) = ... is undefined behavior under strict
* alignment rules (UBSan flags it; works in practice on x86_64).
*/
memcpy(buffer->data + offset, &scalar_val->val.int_value,
sizeof(int64));

*agtentry = AGTENTRY_IS_AGTYPE | (padlen + numlen + AGT_HEADER_SIZE);
break;
Expand All @@ -68,7 +75,8 @@ bool ag_serialize_extended_type(StringInfo buffer, agtentry *agtentry,
/* copy in the float_value data */
numlen = sizeof(scalar_val->val.float_value);
offset = reserve_from_buffer(buffer, numlen);
*((float8 *)(buffer->data + offset)) = scalar_val->val.float_value;
memcpy(buffer->data + offset, &scalar_val->val.float_value,
sizeof(float8));

*agtentry = AGTENTRY_IS_AGTYPE | (padlen + numlen + AGT_HEADER_SIZE);
break;
Expand Down Expand Up @@ -154,12 +162,17 @@ void ag_deserialize_extended_type(char *base_addr, uint32 offset,
{
case AGT_HEADER_INTEGER:
result->type = AGTV_INTEGER;
result->val.int_value = *((int64 *)(base + AGT_HEADER_SIZE));
/* See comment in ag_serialize_extended_type. The 4-byte AGT_HEADER
* leaves the int64 at a 4-byte-aligned but not 8-byte-aligned
* address, so a direct typed load is undefined behavior. */
memcpy(&result->val.int_value, base + AGT_HEADER_SIZE,
sizeof(int64));
break;

case AGT_HEADER_FLOAT:
result->type = AGTV_FLOAT;
result->val.float_value = *((float8 *)(base + AGT_HEADER_SIZE));
memcpy(&result->val.float_value, base + AGT_HEADER_SIZE,
sizeof(float8));
break;

case AGT_HEADER_VERTEX:
Expand Down
28 changes: 23 additions & 5 deletions src/backend/utils/adt/agtype_raw.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,20 @@ void write_graphid(agtype_build_state *bstate, graphid graphid)
write_const(AGT_HEADER_INTEGER, AGT_HEADER_TYPE);
length += AGT_HEADER_SIZE;

/* graphid value */
write_const(graphid, int64);
length += sizeof(int64);
/*
* graphid value (int64). The 4-byte AGT_HEADER above leaves the buffer
* write position 4-byte-aligned but not 8-byte-aligned, so the typed
* write done by write_const(graphid, int64) is undefined behavior under
* strict alignment rules. Use memcpy. (UBSan flags the typed write as
* "store to misaligned address ... requires 8 byte alignment".)
*/
{
int numlen = sizeof(int64);
int g_offset = BUFFER_RESERVE(numlen);

memcpy(bstate->buffer->data + g_offset, &graphid, sizeof(int64));
length += numlen;
}

/* agtentry */
write_agt(AGTENTRY_IS_AGTYPE | length);
Expand All @@ -214,8 +225,15 @@ void write_container(agtype_build_state *bstate, agtype *agtype)
/* padding */
length += BUFFER_WRITE_PAD();

/* varlen data */
length += write_ptr((char *) &agtype->root, VARSIZE(agtype));
/*
* Copy the inner agtype_container only, NOT the outer varlena header.
* VARSIZE(agtype) reports the total varlena size (including the 4-byte
* vl_len_ header), but we are starting our copy at &agtype->root, which
* is already past that header. Subtracting VARHDRSZ avoids reading
* VARHDRSZ bytes past the source allocation (caught by ASan as
* heap-buffer-overflow in __interceptor_memcpy from write_pointer).
*/
length += write_ptr((char *) &agtype->root, VARSIZE(agtype) - VARHDRSZ);

/* agtentry */
write_agt(AGTENTRY_IS_CONTAINER | length);
Expand Down
Loading
Loading