-
-
Notifications
You must be signed in to change notification settings - Fork 268
Add BLB_write function to allow modification of data in a BLOB #9066
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
dfb5435
c9426de
9d2bb63
3917e5b
d06480b
d632219
52b1844
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -90,6 +90,82 @@ static void move_to_string(Jrd::thread_db*, dsc*, dsc*); | |
| static void slice_callback(array_slice*, ULONG, dsc*); | ||
| static blb* store_array(thread_db*, jrd_tra*, bid*); | ||
|
|
||
| namespace { | ||
|
|
||
| // A helper class to track positions of buffer, pages and modifications | ||
| class DataModifyHelper | ||
| { | ||
| public: | ||
| DataModifyHelper(thread_db* tdbb, const vcl* blb_pages, const offset_t position, const void* buffer, const ULONG length) : | ||
| m_newData(buffer), m_newLength(length), | ||
| m_level1Pages(*blb_pages), | ||
| m_pageDataLength(tdbb->getDatabase()->dbb_page_size - BLP_SIZE) | ||
| { | ||
| m_level1PageId = position / m_pageDataLength; // Number of used pages | ||
| m_offset = position % m_pageDataLength; // Position in the page | ||
| } | ||
|
|
||
| // Get data from blob data page and replace data on it | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Get data ? |
||
| inline void replaceInPage(blob_page* page) noexcept | ||
| { | ||
| fb_assert(needWrite()); | ||
|
|
||
| UCHAR* data = reinterpret_cast<UCHAR*>(page->blp_page); | ||
| const ULONG dataLength = std::min<ULONG>(m_pageDataLength - m_offset, m_newLength - m_written); | ||
| fb_assert(dataLength <= m_pageDataLength); | ||
|
|
||
| memcpy(data + m_offset, reinterpret_cast<const UCHAR*>(m_newData) + m_written, dataLength); | ||
| m_written += dataLength; | ||
| m_offset = 0; // Offset only in the first page | ||
| }; | ||
|
|
||
| // Move child page Id from level 1 to level 2 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is completely unclear. |
||
| inline ULONG setLevel2(const USHORT pagesOnRootPage) | ||
| { | ||
| const auto pageId = m_level1PageId; | ||
| m_level1PageId = pageId / pagesOnRootPage; // 100000 / 8000 = 12 // level1 page number | ||
| return pageId % pagesOnRootPage; // 100000 % 8000 = 4000 // level2 page number | ||
| } | ||
|
|
||
| // Get level 1 or level 2 page | ||
| inline ULONG getNextLevel1PageId() noexcept | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unclear as above. |
||
| { | ||
| return m_level1Pages[m_level1PageId++]; | ||
| } | ||
|
|
||
| // Pages are over, write to buffer | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean - pointer pages ? Data pages ? Anything else ? |
||
| inline bool hasPages() const noexcept | ||
| { | ||
| return m_level1PageId < m_level1Pages.count(); | ||
| } | ||
|
|
||
| inline bool needWrite() const noexcept | ||
| { | ||
| return m_written < m_newLength; | ||
| } | ||
|
|
||
| [[maybe_unused]] | ||
| inline ULONG getWrittenLength() const noexcept | ||
| { | ||
| return m_written; | ||
| } | ||
|
|
||
| private: | ||
| // Where to replace | ||
| offset_t m_offset = 0; | ||
|
|
||
| // New Data | ||
| const void* m_newData; | ||
| const ULONG m_newLength; | ||
|
|
||
| ULONG m_level1PageId = 0; | ||
| const vcl& m_level1Pages; | ||
| const USHORT m_pageDataLength; | ||
|
|
||
| ULONG m_written = 0; | ||
| }; | ||
|
|
||
| } | ||
|
|
||
| void blb::BLB_cancel(thread_db* tdbb) | ||
| { | ||
|
|
@@ -1573,6 +1649,21 @@ void blb::BLB_put_data(thread_db* tdbb, const UCHAR* buffer, SLONG length) | |
| SET_TDBB(tdbb); | ||
| const BLOB_PTR* p = buffer; | ||
|
|
||
| // BLB_put_segment will remove the flag after teh first call so replace the data here | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. teh -> the |
||
| if (blb_flags & BLB_seek) | ||
| { | ||
| if (!(blb_flags & BLB_temporary) || (blb_flags & BLB_closed)) | ||
| ERR_post(Arg::Gds(isc_cannot_update_old_blob)); | ||
|
|
||
| blb_flags &= ~BLB_seek; | ||
|
|
||
| // Modify part inside existing data | ||
| if (modifyDataMoveBuffer(tdbb, blb_seek, p, length)) | ||
| return; | ||
|
|
||
| // Continue and append the rest | ||
| } | ||
|
|
||
| while (length > 0) | ||
| { | ||
| // ASF: the comment below was copied from BLB_get_data | ||
|
|
@@ -1607,6 +1698,17 @@ void blb::BLB_put_segment(thread_db* tdbb, const void* seg, USHORT segment_lengt | |
| if (!(blb_flags & BLB_temporary) || (blb_flags & BLB_closed)) | ||
| ERR_post(Arg::Gds(isc_cannot_update_old_blob)); | ||
|
|
||
| if (blb_flags & BLB_seek) | ||
| { | ||
| blb_flags &= ~BLB_seek; | ||
|
|
||
| // Modify part inside existing data | ||
| if (modifyDataMoveBuffer(tdbb, blb_seek, segment, segment_length)) | ||
| return; | ||
|
|
||
| // Continue and append the rest | ||
| } | ||
|
|
||
| if (blb_filter) | ||
| { | ||
| BLF_put_segment(tdbb, &blb_filter, segment_length, segment); | ||
|
|
@@ -1944,6 +2046,131 @@ void blb::scalar(thread_db* tdbb, | |
| blob->BLB_close(tdbb); | ||
| } | ||
|
|
||
| void blb::modifyExistingData(thread_db* tdbb, offset_t position, const void* buffer, const ULONG length) | ||
| { | ||
| // All BLOB data is stored in the following format: <pages> <buffer> | ||
| // | ||
| // <buffer> contains unflushed data and is easy to modify. | ||
| // <pages> must be fetched, modified, marked, and released. | ||
| // | ||
| // Depending on the level, the algorithm works as follows: | ||
| // | ||
| // Level 0: All data is inside blb_buffer. | ||
| // This is the simplest case: just perform a memset, and we're done. | ||
| // | ||
| // Level 1: Flushed data is located on pages (blb_pages), unflushed data is in blb_buffer. | ||
| // To modify the data: | ||
| // 1. Find the first page that needs modification, read, mark and release it. | ||
| // 2. If the remaining data to modify exceeds the current page size, proceed to the next page. | ||
| // 3. If there are no more pages but there is still data to modify, update the <buffer>. | ||
| // | ||
| // Level 2: Flushed data is organized in a pages tree. | ||
| // - The blb_pages array contains level 1 pages. | ||
| // - Each level 1 page holds a list of level 2 page IDs. | ||
| // | ||
| // To locate and modify the required page: | ||
| // 1. Calculate the page offset: OFFSET = position / <page size>. | ||
| // 2. Determine the target level 2 page: FIRST = OFFSET / <number of IDs per page>. | ||
| // 3. Compute the byte offset within the level 2 page: | ||
| // BytesOffset = position % <page size>. | ||
| // 4. Modify the first relevant level 2 page, then move to the next one. | ||
| // 5. If no more level 2 pages are available, advance to the next level 1 page, | ||
| // read its first level 2 page, and continue modifying subsequent level 2 pages. | ||
| // 6. If all pages have been processed but there is still unmodified data, update the <buffer>. | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, but.. where you get the terminology you used ? I'm sorry, but it is very, very hard to read - I need constantly translate into familiar terms. There is no level of blob pages. There is level of blob. There are blob pointer pages and blob data pages, and blob record contains blob data (for level-0 blobs) or array of pointers (for non-0 level blobs). |
||
|
|
||
| fb_assert ((blb_flags & BLB_temporary) && !(blb_flags & BLB_closed)); // Can update only new blob | ||
| fb_assert(position + length <= blb_length); // Update only existing data | ||
|
|
||
| if (blb_level == 0) // No pages, just a buffer | ||
| { | ||
| blob_page* page = (blob_page*) getBuffer(); | ||
| memcpy(reinterpret_cast<char*>(page->blp_page) + position, buffer, length); | ||
| return; | ||
| } | ||
|
|
||
| // Use helper to simplify pages modification | ||
| DataModifyHelper helper(tdbb, blb_pages, position, buffer, length); | ||
| blob_page* page = nullptr; | ||
|
|
||
| WIN window(blb_pg_space_id, -1); | ||
| if (blb_flags & BLB_large_scan) | ||
| { | ||
| window.win_flags = WIN_large_scan; | ||
| window.win_scans = 1; | ||
| } | ||
|
|
||
| auto releasePage = [&tdbb, &window](const bool mark) | ||
| { | ||
| if (mark) | ||
| CCH_MARK(tdbb, &window); // Mark as dirty | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Page must be marked before any attempt to change the contents! |
||
|
|
||
| if (window.win_flags & WIN_large_scan) | ||
| CCH_RELEASE_TAIL(tdbb, &window); | ||
| else | ||
| CCH_RELEASE(tdbb, &window); | ||
| }; | ||
|
|
||
| // Level 1 blobs are much easier -- page number is in vector. | ||
| if (blb_level == 1) | ||
| { | ||
| // Level1 pages are pointing to data | ||
|
|
||
| // Update data on pages one by one | ||
| while (helper.needWrite()) | ||
| { | ||
| if (!helper.hasPages()) // The last data chunk is in the blb_buffer | ||
| { | ||
| page = reinterpret_cast<blob_page*>(getBuffer()); | ||
| helper.replaceInPage(page); | ||
| fb_assert(helper.getWrittenLength() == length); | ||
| return; | ||
| } | ||
|
|
||
| // Level 1 page constains data | ||
| window.win_page = helper.getNextLevel1PageId(); | ||
| page = reinterpret_cast<blob_page*>(CCH_FETCH(tdbb, &window, LCK_write, pag_blob)); | ||
| helper.replaceInPage(page); | ||
| releasePage(true); | ||
| } | ||
| } | ||
| else | ||
| { | ||
| // Level1 pages are pointing to Level2 pages | ||
| // Level2 pages are pointing to date | ||
|
|
||
| auto level2pageOffset = helper.setLevel2(blb_pointers); | ||
| while (helper.needWrite()) | ||
| { | ||
| if (!helper.hasPages()) // The last data is in the blb_buffer | ||
| { | ||
| helper.replaceInPage(page); | ||
| fb_assert(helper.getWrittenLength() == length); | ||
| return; | ||
| } | ||
|
|
||
| // Level 1 page contains pointers | ||
| window.win_page = helper.getNextLevel1PageId(); | ||
| page = reinterpret_cast<blob_page*>(CCH_FETCH(tdbb, &window, LCK_write, pag_blob)); | ||
|
|
||
| // Level 2 pages contain data. Update them one by one | ||
| const auto numberOfPagess = page->blp_length / sizeof(page->blp_page); | ||
| for (FB_SIZE_T i = level2pageOffset; i < numberOfPagess && helper.needWrite(); ++i) | ||
| { | ||
| auto level2Page = reinterpret_cast<blob_page*>(CCH_HANDOFF(tdbb, &window, | ||
| page->blp_page[i], | ||
| LCK_write, pag_blob)); | ||
|
|
||
| helper.replaceInPage(level2Page); | ||
| CCH_MARK(tdbb, &window); | ||
| } | ||
| releasePage(false); | ||
|
|
||
| level2pageOffset = 0; // Offset only for the first pages | ||
| } | ||
| } | ||
| fb_assert(helper.getWrittenLength() == length); | ||
| } | ||
|
|
||
| static ArrayField* alloc_array(jrd_tra* transaction, Ods::InternalArrayDesc* proto_desc) | ||
| { | ||
|
|
@@ -3033,3 +3260,22 @@ void blb::BLB_cancel() | |
| { | ||
| BLB_cancel(JRD_get_thread_data()); | ||
| } | ||
|
|
||
| FB_SIZE_T blb::BLB_read(thread_db* tdbb, const offset_t position, void* buffer, const ULONG length) | ||
| { | ||
| // Mode 0 - from start | ||
| BLB_lseek(0, position); | ||
| return BLB_get_data(tdbb, reinterpret_cast<UCHAR*>(buffer), length, false); | ||
| } | ||
|
|
||
| void blb::BLB_write(thread_db* tdbb, const offset_t position, const void* buffer, ULONG length) | ||
| { | ||
| if (!(blb_flags & BLB_temporary) || (blb_flags & BLB_closed)) | ||
| ERR_post(Arg::Gds(isc_cannot_update_old_blob)); // Cannot update existing blob | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This check is duplicated many times, worth to move it in to separate routine. |
||
|
|
||
| // Modify part inside existing data | ||
| if (modifyDataMoveBuffer(tdbb, position, buffer, length)) | ||
| return; // Only modify, exit | ||
|
|
||
| BLB_put_data(tdbb, reinterpret_cast<const UCHAR*>(buffer), length); // Append | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,6 +40,7 @@ | |
| #include "../common/classes/ImplementHelper.h" | ||
| #include "../common/dsc.h" | ||
| #include "../jrd/Resources.h" | ||
| #include "err_proto.h" | ||
|
|
||
| namespace Ods | ||
| { | ||
|
|
@@ -133,6 +134,14 @@ class blb : public pool_alloc<type_blb> | |
| return destination; | ||
| } | ||
|
|
||
| // Read form specified position | ||
| FB_SIZE_T BLB_read(thread_db* tdbb, const offset_t position, void* buffer, const ULONG length); | ||
|
|
||
| // Write data at any position in a temporally (new) blob | ||
| // The position of the new buffer must start inside the blob range, but its length may extend beyond it | ||
| // Existing data will be overwritten | ||
| void BLB_write(thread_db* tdbb, const offset_t position, const void* buffer, ULONG length); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these two functions supposed to be a public API ? I see no changes in interfaces. |
||
|
|
||
| private: | ||
| static blb* allocate_blob(thread_db*, jrd_tra*); | ||
| static blb* copy_blob(thread_db* tdbb, const bid* source, bid* destination, | ||
|
|
@@ -142,6 +151,41 @@ class blb : public pool_alloc<type_blb> | |
| void insert_page(thread_db*); | ||
| void destroy(const bool purge_flag); | ||
|
|
||
| // Modify only existing data. Throw error on side violation | ||
| void modifyExistingData(thread_db* tdbb, offset_t position, const void* buffer, const ULONG length); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, am I missed a way to modify non-existing data ? :) |
||
|
|
||
| // Modify existing data | ||
| // Output: | ||
| // true: the input range is only inside the blob data | ||
| // false: the input range is extends beyond existing data. Modify `buffer` and `length` to return only non-written data | ||
| template<class BufferType, class SizeType> | ||
| requires((std::is_same_v<BufferType, void> || std::is_same_v<BufferType, UCHAR>) && std::is_integral_v<SizeType>) | ||
| bool modifyDataMoveBuffer(thread_db* tdbb, const offset_t position, const BufferType*& buffer, SizeType& length) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DataMoveBuffer ? What is it ? We already have MoveBuffer used in MOV\CVT, and it is completely different thing. Why template here ? Why it is inline routine ? |
||
| { | ||
| if (position > blb_length) | ||
| { | ||
| ERR_post(Firebird::Arg::Gds(isc_blob_out_of_length_write) << | ||
| Firebird::Arg::Int64(position) << Firebird::Arg::Int64(blb_length)); | ||
| } | ||
|
|
||
| const offset_t end = position + length; | ||
| if (end <= blb_length) | ||
| { | ||
| // Range is inside the current data, replace and report that no extra actions are requeued | ||
| modifyExistingData(tdbb, position, buffer, length); | ||
| return true; | ||
| } | ||
|
|
||
| // Part inside existing data | ||
| const offset_t middle = blb_length - position; | ||
| modifyExistingData(tdbb, position, buffer, middle); | ||
|
|
||
| // Return only part to append | ||
| buffer = reinterpret_cast<const BufferType*>(reinterpret_cast<const UCHAR*>(buffer) + middle); // Move pointer | ||
| length -= middle; | ||
| return false; | ||
| } | ||
|
|
||
| FB_SIZE_T blb_temp_size = 0; // size stored in transaction temp space | ||
| offset_t blb_temp_offset = 0; // offset in transaction temp space | ||
| Attachment* blb_attachment = nullptr; // database attachment | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
blob_write_out_of_boundsorblob_write_after_the_end?Same for the message text.