Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/include/firebird/impl/msg/jrd.h
Original file line number Diff line number Diff line change
Expand Up @@ -1020,3 +1020,4 @@ FB_IMPL_MSG(JRD, 1017, dsql_agg_non_agg_context, -104, "42", "000", "Aggregate f
FB_IMPL_MSG(JRD, 1018, dsql_agg_param_not_accum, -204, "42", "000", "Aggregate function input parameters may be referenced only in ON ACCUMULATE DO")
FB_IMPL_MSG(JRD, 1019, dsql_agg_exit_group, -204, "42", "000", "EXIT is not allowed in ON GROUP DO section of aggregate function")
FB_IMPL_MSG(JRD, 1020, dsql_agg_return, -204, "42", "000", "RETURN is not allowed in ON START DO, ON ACCUMULATE DO or ON FINISH DO sections of aggregate function; use EXIT instead")
FB_IMPL_MSG(JRD, 1021, blob_out_of_length_write, -204, "42", "000", "Cannot write to blob. Position @1 is out of blob length @2")

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

blob_write_out_of_bounds or blob_write_after_the_end ?

Same for the message text.

1 change: 1 addition & 0 deletions src/include/gen/Firebird.pas
Original file line number Diff line number Diff line change
Expand Up @@ -6062,6 +6062,7 @@ IPerformanceStatsImpl = class(IPerformanceStats)
isc_dsql_agg_param_not_accum = 335545338;
isc_dsql_agg_exit_group = 335545339;
isc_dsql_agg_return = 335545340;
isc_blob_out_of_length_write = 335545341;
isc_gfix_db_name = 335740929;
isc_gfix_invalid_sw = 335740930;
isc_gfix_incmp_sw = 335740932;
Expand Down
246 changes: 246 additions & 0 deletions src/jrd/blb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,82 @@ static void move_to_string(Jrd::thread_db*, dsc*, dsc*);
static void slice_callback(array_slice*, ULONG, dsc*);
static blb* store_array(thread_db*, jrd_tra*, bid*);

namespace {

// A helper class to track positions of buffer, pages and modifications
class DataModifyHelper
{
public:
DataModifyHelper(thread_db* tdbb, const vcl* blb_pages, const offset_t position, const void* buffer, const ULONG length) :
m_newData(buffer), m_newLength(length),
m_level1Pages(*blb_pages),
m_pageDataLength(tdbb->getDatabase()->dbb_page_size - BLP_SIZE)
{
m_level1PageId = position / m_pageDataLength; // Number of used pages
m_offset = position % m_pageDataLength; // Position in the page
}

// Get data from blob data page and replace data on it

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Get data ?

inline void replaceInPage(blob_page* page) noexcept
{
fb_assert(needWrite());

UCHAR* data = reinterpret_cast<UCHAR*>(page->blp_page);
const ULONG dataLength = std::min<ULONG>(m_pageDataLength - m_offset, m_newLength - m_written);
fb_assert(dataLength <= m_pageDataLength);

memcpy(data + m_offset, reinterpret_cast<const UCHAR*>(m_newData) + m_written, dataLength);
m_written += dataLength;
m_offset = 0; // Offset only in the first page
};

// Move child page Id from level 1 to level 2

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is completely unclear.
There is no way to "move" "page Id" (which "Id" ???) between "levels".

inline ULONG setLevel2(const USHORT pagesOnRootPage)
{
const auto pageId = m_level1PageId;
m_level1PageId = pageId / pagesOnRootPage; // 100000 / 8000 = 12 // level1 page number
return pageId % pagesOnRootPage; // 100000 % 8000 = 4000 // level2 page number
}

// Get level 1 or level 2 page
inline ULONG getNextLevel1PageId() noexcept

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unclear as above.

{
return m_level1Pages[m_level1PageId++];
}

// Pages are over, write to buffer

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean - pointer pages ? Data pages ? Anything else ?

inline bool hasPages() const noexcept
{
return m_level1PageId < m_level1Pages.count();
}

inline bool needWrite() const noexcept
{
return m_written < m_newLength;
}

[[maybe_unused]]
inline ULONG getWrittenLength() const noexcept
{
return m_written;
}

private:
// Where to replace
offset_t m_offset = 0;

// New Data
const void* m_newData;
const ULONG m_newLength;

ULONG m_level1PageId = 0;
const vcl& m_level1Pages;
const USHORT m_pageDataLength;

ULONG m_written = 0;
};

}

void blb::BLB_cancel(thread_db* tdbb)
{
Expand Down Expand Up @@ -1573,6 +1649,21 @@ void blb::BLB_put_data(thread_db* tdbb, const UCHAR* buffer, SLONG length)
SET_TDBB(tdbb);
const BLOB_PTR* p = buffer;

// BLB_put_segment will remove the flag after teh first call so replace the data here

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

teh -> the

if (blb_flags & BLB_seek)
{
if (!(blb_flags & BLB_temporary) || (blb_flags & BLB_closed))
ERR_post(Arg::Gds(isc_cannot_update_old_blob));

blb_flags &= ~BLB_seek;

// Modify part inside existing data
if (modifyDataMoveBuffer(tdbb, blb_seek, p, length))
return;

// Continue and append the rest
}

while (length > 0)
{
// ASF: the comment below was copied from BLB_get_data
Expand Down Expand Up @@ -1607,6 +1698,17 @@ void blb::BLB_put_segment(thread_db* tdbb, const void* seg, USHORT segment_lengt
if (!(blb_flags & BLB_temporary) || (blb_flags & BLB_closed))
ERR_post(Arg::Gds(isc_cannot_update_old_blob));

if (blb_flags & BLB_seek)
{
blb_flags &= ~BLB_seek;

// Modify part inside existing data
if (modifyDataMoveBuffer(tdbb, blb_seek, segment, segment_length))
return;

// Continue and append the rest
}

if (blb_filter)
{
BLF_put_segment(tdbb, &blb_filter, segment_length, segment);
Expand Down Expand Up @@ -1944,6 +2046,131 @@ void blb::scalar(thread_db* tdbb,
blob->BLB_close(tdbb);
}

void blb::modifyExistingData(thread_db* tdbb, offset_t position, const void* buffer, const ULONG length)
{
// All BLOB data is stored in the following format: <pages> <buffer>
//
// <buffer> contains unflushed data and is easy to modify.
// <pages> must be fetched, modified, marked, and released.
//
// Depending on the level, the algorithm works as follows:
//
// Level 0: All data is inside blb_buffer.
// This is the simplest case: just perform a memset, and we're done.
//
// Level 1: Flushed data is located on pages (blb_pages), unflushed data is in blb_buffer.
// To modify the data:
// 1. Find the first page that needs modification, read, mark and release it.
// 2. If the remaining data to modify exceeds the current page size, proceed to the next page.
// 3. If there are no more pages but there is still data to modify, update the <buffer>.
//
// Level 2: Flushed data is organized in a pages tree.
// - The blb_pages array contains level 1 pages.
// - Each level 1 page holds a list of level 2 page IDs.
//
// To locate and modify the required page:
// 1. Calculate the page offset: OFFSET = position / <page size>.
// 2. Determine the target level 2 page: FIRST = OFFSET / <number of IDs per page>.
// 3. Compute the byte offset within the level 2 page:
// BytesOffset = position % <page size>.
// 4. Modify the first relevant level 2 page, then move to the next one.
// 5. If no more level 2 pages are available, advance to the next level 1 page,
// read its first level 2 page, and continue modifying subsequent level 2 pages.
// 6. If all pages have been processed but there is still unmodified data, update the <buffer>.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, but.. where you get the terminology you used ? I'm sorry, but it is very, very hard to read - I need constantly translate into familiar terms.

There is no level of blob pages. There is level of blob.

There are blob pointer pages and blob data pages, and blob record contains blob data (for level-0 blobs) or array of pointers (for non-0 level blobs).


fb_assert ((blb_flags & BLB_temporary) && !(blb_flags & BLB_closed)); // Can update only new blob
fb_assert(position + length <= blb_length); // Update only existing data

if (blb_level == 0) // No pages, just a buffer
{
blob_page* page = (blob_page*) getBuffer();
memcpy(reinterpret_cast<char*>(page->blp_page) + position, buffer, length);
return;
}

// Use helper to simplify pages modification
DataModifyHelper helper(tdbb, blb_pages, position, buffer, length);
blob_page* page = nullptr;

WIN window(blb_pg_space_id, -1);
if (blb_flags & BLB_large_scan)
{
window.win_flags = WIN_large_scan;
window.win_scans = 1;
}

auto releasePage = [&tdbb, &window](const bool mark)
{
if (mark)
CCH_MARK(tdbb, &window); // Mark as dirty

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Page must be marked before any attempt to change the contents!


if (window.win_flags & WIN_large_scan)
CCH_RELEASE_TAIL(tdbb, &window);
else
CCH_RELEASE(tdbb, &window);
};

// Level 1 blobs are much easier -- page number is in vector.
if (blb_level == 1)
{
// Level1 pages are pointing to data

// Update data on pages one by one
while (helper.needWrite())
{
if (!helper.hasPages()) // The last data chunk is in the blb_buffer
{
page = reinterpret_cast<blob_page*>(getBuffer());
helper.replaceInPage(page);
fb_assert(helper.getWrittenLength() == length);
return;
}

// Level 1 page constains data
window.win_page = helper.getNextLevel1PageId();
page = reinterpret_cast<blob_page*>(CCH_FETCH(tdbb, &window, LCK_write, pag_blob));
helper.replaceInPage(page);
releasePage(true);
}
}
else
{
// Level1 pages are pointing to Level2 pages
// Level2 pages are pointing to date

auto level2pageOffset = helper.setLevel2(blb_pointers);
while (helper.needWrite())
{
if (!helper.hasPages()) // The last data is in the blb_buffer
{
helper.replaceInPage(page);
fb_assert(helper.getWrittenLength() == length);
return;
}

// Level 1 page contains pointers
window.win_page = helper.getNextLevel1PageId();
page = reinterpret_cast<blob_page*>(CCH_FETCH(tdbb, &window, LCK_write, pag_blob));

// Level 2 pages contain data. Update them one by one
const auto numberOfPagess = page->blp_length / sizeof(page->blp_page);
for (FB_SIZE_T i = level2pageOffset; i < numberOfPagess && helper.needWrite(); ++i)
{
auto level2Page = reinterpret_cast<blob_page*>(CCH_HANDOFF(tdbb, &window,
page->blp_page[i],
LCK_write, pag_blob));

helper.replaceInPage(level2Page);
CCH_MARK(tdbb, &window);
}
releasePage(false);

level2pageOffset = 0; // Offset only for the first pages
}
}
fb_assert(helper.getWrittenLength() == length);
}

static ArrayField* alloc_array(jrd_tra* transaction, Ods::InternalArrayDesc* proto_desc)
{
Expand Down Expand Up @@ -3033,3 +3260,22 @@ void blb::BLB_cancel()
{
BLB_cancel(JRD_get_thread_data());
}

FB_SIZE_T blb::BLB_read(thread_db* tdbb, const offset_t position, void* buffer, const ULONG length)
{
// Mode 0 - from start
BLB_lseek(0, position);
return BLB_get_data(tdbb, reinterpret_cast<UCHAR*>(buffer), length, false);
}

void blb::BLB_write(thread_db* tdbb, const offset_t position, const void* buffer, ULONG length)
{
if (!(blb_flags & BLB_temporary) || (blb_flags & BLB_closed))
ERR_post(Arg::Gds(isc_cannot_update_old_blob)); // Cannot update existing blob

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This check is duplicated many times, worth to move it in to separate routine.


// Modify part inside existing data
if (modifyDataMoveBuffer(tdbb, position, buffer, length))
return; // Only modify, exit

BLB_put_data(tdbb, reinterpret_cast<const UCHAR*>(buffer), length); // Append
}
44 changes: 44 additions & 0 deletions src/jrd/blb.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "../common/classes/ImplementHelper.h"
#include "../common/dsc.h"
#include "../jrd/Resources.h"
#include "err_proto.h"

namespace Ods
{
Expand Down Expand Up @@ -133,6 +134,14 @@ class blb : public pool_alloc<type_blb>
return destination;
}

// Read form specified position
FB_SIZE_T BLB_read(thread_db* tdbb, const offset_t position, void* buffer, const ULONG length);

// Write data at any position in a temporally (new) blob
// The position of the new buffer must start inside the blob range, but its length may extend beyond it
// Existing data will be overwritten
void BLB_write(thread_db* tdbb, const offset_t position, const void* buffer, ULONG length);

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these two functions supposed to be a public API ? I see no changes in interfaces.
If not - it should not start with BLB_


private:
static blb* allocate_blob(thread_db*, jrd_tra*);
static blb* copy_blob(thread_db* tdbb, const bid* source, bid* destination,
Expand All @@ -142,6 +151,41 @@ class blb : public pool_alloc<type_blb>
void insert_page(thread_db*);
void destroy(const bool purge_flag);

// Modify only existing data. Throw error on side violation
void modifyExistingData(thread_db* tdbb, offset_t position, const void* buffer, const ULONG length);

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, am I missed a way to modify non-existing data ? :)


// Modify existing data
// Output:
// true: the input range is only inside the blob data
// false: the input range is extends beyond existing data. Modify `buffer` and `length` to return only non-written data
template<class BufferType, class SizeType>
requires((std::is_same_v<BufferType, void> || std::is_same_v<BufferType, UCHAR>) && std::is_integral_v<SizeType>)
bool modifyDataMoveBuffer(thread_db* tdbb, const offset_t position, const BufferType*& buffer, SizeType& length)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DataMoveBuffer ? What is it ? We already have MoveBuffer used in MOV\CVT, and it is completely different thing.

Why template here ? Why it is inline routine ?

{
if (position > blb_length)
{
ERR_post(Firebird::Arg::Gds(isc_blob_out_of_length_write) <<
Firebird::Arg::Int64(position) << Firebird::Arg::Int64(blb_length));
}

const offset_t end = position + length;
if (end <= blb_length)
{
// Range is inside the current data, replace and report that no extra actions are requeued
modifyExistingData(tdbb, position, buffer, length);
return true;
}

// Part inside existing data
const offset_t middle = blb_length - position;
modifyExistingData(tdbb, position, buffer, middle);

// Return only part to append
buffer = reinterpret_cast<const BufferType*>(reinterpret_cast<const UCHAR*>(buffer) + middle); // Move pointer
length -= middle;
return false;
}

FB_SIZE_T blb_temp_size = 0; // size stored in transaction temp space
offset_t blb_temp_offset = 0; // offset in transaction temp space
Attachment* blb_attachment = nullptr; // database attachment
Expand Down
Loading
Loading