@@ -96,11 +96,9 @@ Function GetValueType(ByRef bytes() As Byte, ByVal offset As Long) As PDF_ValueT
9696 Case "%"
9797 GetValueType = PDF_ValueType.PDF_Comment
9898
99- Case "e" , ">" ', "]"
99+ Case "e" , ">" '
100100 tmpStr = GetWord(bytes, offset)
101101 Select Case LCase(tmpStr)
102- ' Case "]"
103- ' GetValueType = PDF_ValueType.PDF_EndOfArray
104102 Case ">>"
105103 GetValueType = PDF_ValueType.PDF_EndOfDictionary
106104 Case "endstream"
@@ -451,8 +449,7 @@ Function GetDictionaryValue(ByRef Value As pdfValue, ByVal name As String) As pd
451449 If Value.Value.Exists(name) Then
452450 Set GetDictionaryValue = Value.Value.Item(name)
453451 Else
454- Set GetDictionaryValue = New pdfValue
455- GetDictionaryValue.valueType = PDF_ValueType.PDF_Null
452+ Set GetDictionaryValue = New pdfValue ' defaults to PDF_ValueType.PDF_Null
456453 End If
457454 Exit Function
458455errHandler:
@@ -640,6 +637,9 @@ Function ParseXrefTable(ByRef content() As Byte, ByRef offset As Long, ByRef tra
640637
641638 ' we need the uncompressed (un-/Filter'd) data
642639 Dim rawData() As Byte
640+ #If True Then
641+ rawData = objStream.udata
642+ #Else
643643 If objStream.Meta.Exists("/Filter" ) Then
644644 ' TODO support all /Filter types
645645 Dim filter As String
@@ -753,6 +753,7 @@ Function ParseXrefTable(ByRef content() As Byte, ByRef offset As Long, ByRef tra
753753 Else
754754 rawData = objStream.data
755755 End If
756+ #End If
756757
757758 Dim objOffset As Long
758759 objOffset = 0
@@ -849,7 +850,10 @@ errHandler:
849850End Function
850851
851852
852- Function getObject (ByRef content() As Byte , ByRef xrefTable As Dictionary , ByVal Index As Long ) As pdfValue
853+ ' extracts/parses pdf object from raw pdf content()
854+ ' due to potential slowness uncompressing in VBA, stream object streams should be cached
855+ ' the sosCache is only used for stream object streams and only if provided
856+ Function getObject (ByRef content() As Byte , ByRef xrefTable As Dictionary , ByVal Index As Long , ByRef sosCache As Dictionary ) As pdfValue
853857 On Error GoTo errHandler
854858 Dim obj As pdfValue
855859 If xrefTable.Exists(Index) Then
@@ -862,17 +866,26 @@ Function getObject(ByRef content() As Byte, ByRef xrefTable As Dictionary, ByVal
862866 Dim cntrObjEntry As xrefEntry
863867 Set cntrObjEntry = xrefTable.Item(entry.embedObjId)
864868 Dim cntrObj As pdfValue
865- Set cntrObj = getObject(content, xrefTable, entry.embedObjId)
866- Dim dict As Dictionary
867- Set dict = cntrObj.Value.Value.Meta
869+ ' try loading containing object (stream object stream) from cache before potentially uncompressing
870+ If Not sosCache Is Nothing Then
871+ If sosCache.Exists(entry.embedObjId) Then Set cntrObj = sosCache(entry.embedObjId)
872+ End If
873+ If cntrObj Is Nothing Then ' not in cache or no cache provided
874+ Set cntrObj = getObject(content, xrefTable, entry.embedObjId, sosCache)
875+ If Not sosCache Is Nothing Then Set sosCache(entry.embedObjId) = cntrObj ' add/update cache
876+ End If
868877
869878 ' extract our embedded object
870- Dim cbuf() As Byte , buffer() As Byte
871- Dim inOff As Long , outSize As Long , estBufSize As Long
872- cbuf = cntrObj.Value.Value.data
873- inOff = 2
874- If dict.Exists("/DL" ) Then estBufSize = CLng(dict.Item("/DL" ).Value) ' only a hint
875- If inflate2(cbuf, buffer, inOff, outSize, estBufSize) Then
879+ If cntrObj.Value.valueType <> PDF_ValueType.PDF_Stream Then
880+ Debug.Print "Error! expecting stream object stream!"
881+ Stop
882+ GoTo nullValue
883+ End If
884+ Dim streamObjectStream As pdfStream
885+ Set streamObjectStream = cntrObj.Value.Value
886+ Dim buffer() As Byte
887+ buffer = streamObjectStream.udata
888+ If (UBound(buffer) - LBound(buffer)) > 0 Then
876889 ' parse embedded object data
877890 ' buffer has N sets of obj id# <whitespace> offset
878891 ' immediately followed by objects' data, note: /First
@@ -883,6 +896,8 @@ Function getObject(ByRef content() As Byte, ByRef xrefTable As Dictionary, ByVal
883896 Dim embOffset As Long
884897 Dim i As Long
885898 Dim firstOffset As Long
899+ Dim dict As Dictionary
900+ Set dict = streamObjectStream.Meta
886901 If dict.Exists("/First" ) Then
887902 firstOffset = CLng(dict.Item("/First" ).Value)
888903 Else
@@ -927,7 +942,7 @@ Function getObject(ByRef content() As Byte, ByRef xrefTable As Dictionary, ByVal
927942 obj.valueType = PDF_ValueType.PDF_Object
928943 Set obj.Value = GetValue(buffer, embOffset)
929944 Else
930- Debug.Print "Error inflating embedded object!"
945+ Debug.Print "Error reading embedded object!"
931946 Stop
932947 End If
933948 Else
@@ -956,7 +971,7 @@ Function GetRootObject(ByRef content() As Byte, ByRef trailer As pdfValue, ByRef
956971 ' get either reference or /Root object itself
957972 Set root = GetRoot(trailer)
958973 If root.valueType = PDF_ValueType.PDF_Reference Then
959- Set root = getObject(content, xrefTable, root.Value)
974+ Set root = getObject(content, xrefTable, root.Value, Nothing )
960975 'ElseIf root.valueType = PDF_ValueType.PDF_Object Then
961976 End If
962977
@@ -976,7 +991,7 @@ Function GetInfoObject(ByRef content() As Byte, ByRef trailer As pdfValue, ByRef
976991 ' get either reference or /Info object itself
977992 Set Info = GetInfo(trailer)
978993 If Info.valueType = PDF_ValueType.PDF_Reference Then
979- Set Info = getObject(content, xrefTable, Info.Value)
994+ Set Info = getObject(content, xrefTable, Info.Value, Nothing )
980995 'ElseIf info.valueType = PDF_ValueType.PDF_Object Then
981996 End If
982997
@@ -991,7 +1006,7 @@ End Function
9911006
9921007
9931008' updates objects Dictionary with all objects under root node, indexed by object id, i.e. loads a chunk of the PDF document
994- Sub GetObjectsInTree (ByRef root As pdfValue , ByRef content() As Byte , ByRef xrefTable As Dictionary , ByRef objects As Dictionary )
1009+ Sub GetObjectsInTree (ByRef root As pdfValue , ByRef content() As Byte , ByRef xrefTable As Dictionary , ByRef objects As Dictionary , ByRef sosCache As Dictionary )
9951010 On Error GoTo errHandler
9961011 Dim obj As pdfValue
9971012 Dim v As Variant
@@ -1003,27 +1018,27 @@ Sub GetObjectsInTree(ByRef root As pdfValue, ByRef content() As Byte, ByRef xref
10031018 Case PDF_ValueType.PDF_Array
10041019 For Each v In root.Value
10051020 Set obj = v
1006- GetObjectsInTree obj, content, xrefTable, objects
1021+ GetObjectsInTree obj, content, xrefTable, objects, sosCache
10071022 Next v
10081023 Case PDF_ValueType.PDF_Dictionary
10091024 For Each v In root.Value.Items
10101025 Set obj = v
1011- GetObjectsInTree obj, content, xrefTable, objects
1026+ GetObjectsInTree obj, content, xrefTable, objects, sosCache
10121027 Next v
10131028 Case PDF_ValueType.PDF_Object
1014- GetObjectsInTree root.Value, content, xrefTable, objects
1029+ GetObjectsInTree root.Value, content, xrefTable, objects, sosCache
10151030 Case PDF_ValueType.PDF_Reference
10161031 ' we need to load object
10171032 If Not objects.Exists(CLng(root.Value)) Then
1018- Set obj = getObject(content, xrefTable, root.Value)
1033+ Set obj = getObject(content, xrefTable, root.Value, sosCache )
10191034 objects.Add CLng(root.Value), obj
1020- GetObjectsInTree obj, content, xrefTable, objects
1035+ GetObjectsInTree obj, content, xrefTable, objects, sosCache
10211036 End If
10221037 Case PDF_ValueType.PDF_Stream
10231038 Dim stream As pdfStream
10241039 Set stream = root.Value
1025- GetObjectsInTree stream.stream_meta, content, xrefTable, objects
1026- GetObjectsInTree stream.stream_data, content, xrefTable, objects
1040+ GetObjectsInTree stream.stream_meta, content, xrefTable, objects, sosCache
1041+ GetObjectsInTree stream.stream_data, content, xrefTable, objects, sosCache
10271042 Case PDF_ValueType.PDF_StreamData
10281043 ' Nothing to do
10291044 Case PDF_ValueType.PDF_Trailer
0 commit comments