@@ -392,48 +392,97 @@ End Sub
392392
393393
394394' updates id of all objects under root pdfValue beginning at baseId
395- ' Note: use trailer as root to renumber whole document
395+ ' Note: if root is Nothing then will update whole document including caches
396396' Warning: assumes each obj id refers to same obj and objReference instances
397- Public Function renumberIds (ByRef root As pdfValue , ByVal baseId As Long , Optional ByRef visited As Dictionary = Nothing ) As Long
397+ Public Function renumberIds (ByVal baseId As Long , Optional ByRef root As pdfValue = Nothing , Optional ByRef visited As Dictionary = Nothing ) As Long
398398 On Error GoTo errHandler
399- If visited Is Nothing Then Set visited = New Dictionary
400399 Dim v As Variant
401400 Dim obj As pdfValue
402- ' sanity check
403- If root Is Nothing Then Exit Function
401+ Dim fixCache As Boolean
402+
403+ ' renumber whole tree?
404+ If root Is Nothing Then
405+ fixCache = True
406+ Set root = trailer
407+ End If
408+
409+ ' should be Nothing on first call, we use this to avoid recursion and thus renumber already renumbered objs
410+ If visited Is Nothing Then
411+ Set visited = New Dictionary
412+ End If
413+
404414 ' don't renumber same object more than once
405415 If root.id <> 0 Then
406- If visited.Exists(root.id ) Then
416+ If visited.Exists(root) Then
407417 renumberIds = baseId
408418 Exit Function
409419 End If
410420 ' and actually do the renumbering
411421 root.id = baseId
412422 baseId = baseId + 1
413- visited.Add root.id, root.id
423+ ' Note: we use actual object and not id here as temp there may be multiple objs with same id
424+ ' e.g. obj 1 renumbered to 4, and there is still an object 4 not yet renumbered
425+ visited.Add root, root
414426 End If
415427 ' update any children
416428 Select Case root.valueType
417429 Case PDF_ValueType.PDF_Array
418430 For Each v In root.value
419431 Set obj = v
420- baseId = renumberIds(obj, baseId , visited)
432+ baseId = renumberIds(baseId, obj , visited)
421433 Next v
422434 Case PDF_ValueType.PDF_Dictionary
423435 For Each v In root.value.Items
424436 Set obj = v
425- baseId = renumberIds(obj, baseId , visited)
437+ baseId = renumberIds(baseId, obj , visited)
426438 Next v
427439 Case PDF_ValueType.PDF_Reference
428440 Set obj = getObject(root.value, 0 )
429- baseId = renumberIds(obj, baseId , visited)
441+ baseId = renumberIds(baseId, obj , visited)
430442 Case PDF_ValueType.PDF_Trailer, PDF_ValueType.PDF_Object
431- baseId = renumberIds(root.value, baseId , visited)
443+ baseId = renumberIds(baseId, root.value, visited)
432444 Case Else
433445 ' no nested objects that need id's updated
434446 End Select
435447 ' last id used, or same as provided if nothing updated
436448 renumberIds = baseId
449+
450+ ' because we renumbered everything, we need to invalidate or rebuild our cache's
451+ ' but only from top level call
452+ ' note: because we have renumbered, we also invalidate our content buffer as it refers to old ids
453+ If fixCache Then
454+ 'Set visited = Nothing ' this is ByRef so only set to Nothing if originally Nothing
455+
456+ If ByteArraySize(content) >= 0 Then Erase content
457+ Dim newDict As Dictionary
458+ If Not objectCache Is Nothing Then
459+ Set newDict = New Dictionary
460+ For Each v In objectCache.Items
461+ Set obj = v
462+ If obj.id <> 0 Then Set newDict(obj.id) = obj
463+ Next v
464+ Set objectCache = newDict
465+ Set newDict = Nothing
466+ End If
467+
468+ If Not refCache Is Nothing Then
469+ Set newDict = New Dictionary
470+ For Each v In refCache.Items
471+ Set obj = v
472+ With obj
473+ If .id <> 0 Then
474+ Set newDict(.id & "_" & .generation) = obj
475+ Set newDict(.id & " " & .generation & " R" ) = obj.referenceObj
476+ End If
477+ End With
478+ Next v
479+ Set refCache = newDict
480+ Set newDict = Nothing
481+ End If
482+
483+ Set sosCache = Nothing
484+ End If
485+
437486 Exit Function
438487errHandler:
439488 Debug.Print "Error: " & Err.Description & " (" & Err.Number & ")"
@@ -453,7 +502,7 @@ Public Function getObject(ByVal id As Long, ByVal generation As Long, Optional B
453502 Set obj = getCachedObject(id, generation)
454503 ' if it wasn't there then try loading from content()
455504 If obj.valueType = PDF_ValueType.PDF_Null Then
456- Set obj = loadObject(content, xrefTable, id)
505+ Set obj = loadObject(id)
457506 If obj Is Nothing Then Set obj = New pdfValue
458507
459508 ' if successfully loaded, update our object cache
@@ -1325,7 +1374,6 @@ Function GetValue(ByRef bytes() As Byte, ByRef offset As Long, Optional ByRef Me
13251374 Case PDF_ValueType.PDF_Reference, PDF_ValueType.PDF_Object
13261375 Dim words(0 To 2 ) As String
13271376 words(0 ) = GetWord(bytes, offset)
1328- If IsMatch(words(0 ), "3" ) Then Stop
13291377 offset = SkipWhiteSpace(bytes, offset)
13301378 words(1 ) = GetWord(bytes, offset)
13311379 offset = SkipWhiteSpace(bytes, offset)
@@ -1446,19 +1494,33 @@ End Function
14461494
14471495
14481496' extracts/parses pdf object from raw pdf content()
1497+ ' uses xrefTable to determine offset directly or obtain using cross reference stream
14491498' due to potential slowness uncompressing in VBA, stream object streams should be cached
14501499' the sosCache is only used for stream object streams and only if provided
14511500' the refCache is to ensure only 1 indirect reference object instance is created for each object
14521501' this will use provided VBA reference object instance instead of creating a new [different] one
1453- Function loadObject (ByRef content() As Byte , ByRef xrefTable As Dictionary , ByVal Index As Long ) As pdfValue
1502+ ' forceReload bypasses refCache and returns a new VBA object instance ***
1503+ ' *** changes to this object's id may not effect any other object nor their reference values
1504+ ' *** if object previously loaded returns a new instance, otherwise results same as if is False
1505+ ' *** possibly useful to duplicate an object to be mutated and added, but use with care
1506+ Function loadObject (ByVal Index As Long , Optional forceReload As Boolean = False ) As pdfValue
14541507 On Error GoTo errHandler
14551508 Dim obj As pdfValue
1509+
1510+ ' speed up, ensure single object returned, and avoid issues when objects are removed from current objectCache
1511+ If (Not forceReload) And (Not refCache Is Nothing ) Then
1512+ If refCache.Exists(Index) Then
1513+ Set loadObject = refCache(Index)
1514+ Exit Function
1515+ End If
1516+ End If
1517+
14561518 If xrefTable.Exists(Index) Then
14571519 Dim entry As xrefEntry
14581520 Set entry = xrefTable.Item(Index)
14591521 If entry.isFree Or ((Not entry.isEmbeded) And (entry.offset <= 0 )) Then GoTo NullValue
14601522 Dim offset As Long : offset = entry.offset
1461- If offset > UBound (content) Then GoTo NullValue
1523+ If offset > ByteArraySize (content) Then GoTo NullValue
14621524 If entry.isEmbeded Then
14631525 Dim cntrObjEntry As xrefEntry
14641526 Set cntrObjEntry = xrefTable.Item(entry.embedObjId)
@@ -1468,7 +1530,7 @@ Function loadObject(ByRef content() As Byte, ByRef xrefTable As Dictionary, ByVa
14681530 If sosCache.Exists(entry.embedObjId) Then Set cntrObj = sosCache(entry.embedObjId)
14691531 End If
14701532 If cntrObj Is Nothing Then ' not in cache or no cache provided
1471- Set cntrObj = loadObject(content, xrefTable, entry.embedObjId)
1533+ Set cntrObj = loadObject(entry.embedObjId)
14721534 If Not sosCache Is Nothing Then Set sosCache(entry.embedObjId) = cntrObj ' add/update cache
14731535 End If
14741536
@@ -1567,7 +1629,7 @@ Function GetRootObject(ByRef content() As Byte, ByRef trailer As pdfValue, ByRef
15671629 ' get either reference or /Root object itself
15681630 Set root = GetRoot(trailer)
15691631 If root.valueType = PDF_ValueType.PDF_Reference Then
1570- Set root = loadObject(content, xrefTable, root.value)
1632+ Set root = loadObject(root.value)
15711633 'ElseIf root.valueType = PDF_ValueType.PDF_Object Then
15721634 End If
15731635
@@ -1607,7 +1669,7 @@ Sub GetObjectsInTree(ByRef root As pdfValue, ByRef content() As Byte, ByRef xref
16071669 Case PDF_ValueType.PDF_Reference
16081670 ' we need to load object
16091671 If Not objects.Exists(CLng(root.value)) Then
1610- Set obj = loadObject(content, xrefTable, root.value)
1672+ Set obj = loadObject(root.value)
16111673 objects.Add CLng(root.value), obj
16121674 GetObjectsInTree obj, content, xrefTable, objects
16131675 End If
0 commit comments