Skip to content

Commit ad1595d

Browse files
committed
improve Outlines (bookmarks) created when combining multiple PDFs, other minor updates
1 parent 96c5575 commit ad1595d

File tree

11 files changed

+694
-184
lines changed

11 files changed

+694
-184
lines changed

dist/CombinePDF.xlsm

28.7 KB
Binary file not shown.

src/pdfLib.xlsm/Class Modules/pdfDocument.cls

Lines changed: 530 additions & 121 deletions
Large diffs are not rendered by default.

src/pdfLib.xlsm/Class Modules/pdfStream.cls

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Public Property Get Length() As Long
2323
' Length is stored in meta
2424
' it should be same as size of data
2525
If Meta.Exists("/Length") Then
26-
Length = CLng(Meta.Item("/Length").value)
26+
Length = CLng(Meta.item("/Length").value)
2727
End If
2828

2929
On Error Resume Next
@@ -38,17 +38,17 @@ Public Property Get Length() As Long
3838
End Property
3939

4040
' this will add/update /Length property in meta and resize data()
41-
Public Property Let Length(ByVal Count As Long)
42-
Meta("/Length").value = Count ' add or replace
41+
Public Property Let Length(ByVal count As Long)
42+
Meta("/Length").value = count ' add or replace
4343

4444
On Error Resume Next
4545
Dim dataLen As Long
4646
dataLen = UBound(data) - LBound(data)
4747
On Error GoTo 0
4848
If Err.Number = 9 Then ' array out of bounds, i.e. not initialized
49-
ReDim data(0 To Count)
50-
ElseIf dataLen <> Count Then ' no need to adjust size if unchanged (ie just setting meta /Length property
51-
ReDim Preserve data(0 To Count) ' keep any data there, allows extending
49+
ReDim data(0 To count)
50+
ElseIf dataLen <> count Then ' no need to adjust size if unchanged (ie just setting meta /Length property
51+
ReDim Preserve data(0 To count) ' keep any data there, allows extending
5252
End If
5353
End Property
5454

@@ -66,13 +66,13 @@ Public Property Get udata() As Byte()
6666
If Meta.Exists("/Filter") Then
6767
' TODO support all /Filter types
6868
Dim filter As String
69-
filter = Meta.Item("/Filter").value
69+
filter = Meta.item("/Filter").value
7070
Select Case LCase(filter)
7171
Case "/flatedecode"
7272
Dim startIndex As Long: startIndex = 2 ' skip past zlib wrapper to raw Deflate data
7373
Dim outSize As Long ' as count of bytes -- should be 0 on entry so will be size on output
7474
Dim estBufSize As Long ' if known, uncompressed size
75-
If Meta.Exists("/DL") Then estBufSize = CLng(Meta.Item("/DL").value) ' only a hint
75+
If Meta.Exists("/DL") Then estBufSize = CLng(Meta.item("/DL").value) ' only a hint
7676
Erase m_udata
7777

7878
' Note: libdeflate_inflate arguments are byVal, we expect them be byRef
@@ -88,16 +88,16 @@ Public Property Get udata() As Byte()
8888
predictor = 1: columns = 1
8989
If Meta.Exists("/DecodeParms") Then
9090
Dim pdfV As pdfValue
91-
Set pdfV = Meta.Item("/DecodeParms")
91+
Set pdfV = Meta.item("/DecodeParms")
9292
Dim decodeParms As Dictionary
9393
Set decodeParms = pdfV.value
9494
If decodeParms.Exists("/Predictor") Then
95-
Set pdfV = decodeParms.Item("/Predictor")
95+
Set pdfV = decodeParms.item("/Predictor")
9696
predictor = pdfV.value
9797
End If
9898
' should only be supplied if predictor > 1, but we can load value regardless, only used if predictor > 1
9999
If decodeParms.Exists("/Columns") Then
100-
Set pdfV = decodeParms.Item("/Columns")
100+
Set pdfV = decodeParms.item("/Columns")
101101
columns = pdfV.value
102102
End If
103103
Set pdfV = Nothing

src/pdfLib.xlsm/Class Modules/pdfValue.cls

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,15 @@ Public value As Variant
1818
' to help maintain id across indirect references, PDF_Object provides a PDF_Reference object to be used by others
1919
Private m_referenceObj As pdfValue
2020

21+
' used to help encode certain information about how to represent
22+
Public Enum ValueFlags
23+
flgNone = 0
24+
flgUtf8 = 1
25+
flgBinary = 2
26+
End Enum
27+
28+
Public flags As ValueFlags
29+
2130

2231
' keep our referencObj in sync
2332
Public Property Let id(ByVal newId As Long)
@@ -122,7 +131,7 @@ Private Function pdfStringToBytes(ByRef str As String, Optional ByVal asHex As B
122131
' we need to check if any Unicode values and if so we encode in UTF-8
123132
' to avoid more expensive check if >255 value used, we simply always encode as UTF-8 unless ASCII (all values < 128)
124133
ucBytes = StringToUtf8Bytes(str, BOM:=False) ' Note: if only ASCII then equivalent to UTF-8 byte array
125-
If UBound(ucBytes) > strLen Then ucBytes = AddUtf8BOM(ucBytes) ' something >128 and had to be expanded to multiple bytes
134+
If UBound(ucBytes) > strLen Then ucBytes = addUtf8BOM(ucBytes) ' something >128 and had to be expanded to multiple bytes
126135

127136
Dim ndx As Long
128137
' now determine if we are returning bytes as (string) or <## ## ##> format
@@ -186,7 +195,7 @@ Public Function serialize(Optional ByVal baseId As Long = 0) As Byte()
186195
Case PDF_ValueType.PDF_Null
187196
objStr = "null"
188197
Case PDF_ValueType.PDF_Name
189-
objStr = value.value
198+
objStr = pdfDocument.EscapeName(value.value, value.flags And flgUtf8) ' see below where Dictionary Key Name encoded
190199
Case PDF_ValueType.PDF_Boolean
191200
If value.value Then
192201
objStr = "true"
@@ -222,12 +231,12 @@ Public Function serialize(Optional ByVal baseId As Long = 0) As Byte()
222231
If Not firstPass Then objStr = objStr & vbLf
223232
firstPass = False
224233
Dim key As String
225-
If TypeName(v) = "String" Then
234+
If typeName(v) = "String" Then
226235
key = CStr(v)
227-
Else ' assume pdfValue with valueType=PDF_Name
228-
key = CStr(v.value)
236+
Else 'If typeName(v) = "pdfValue" Then ' assume pdfValue with valueType=PDF_Name
237+
key = pdfDocument.EscapeName(v.value, v.flags And flgUtf8)
229238
End If
230-
Set pv = dict.Item(key)
239+
Set pv = dict.item(v) ' v may be pdfValue or String version of key
231240
objStr = objStr & key & " "
232241
objStr = objStr & BytesToString(pv.serialize(baseId))
233242
Next v
@@ -285,8 +294,10 @@ End Function
285294
' can be used by predeclared object, e.g. pdfValue.NewValueObj("/MyName", "/Name")
286295

287296
' returns a name as a pdfValue /Name obj
288-
Function NewNameValue(ByVal name As String) As pdfValue
297+
Function NewNameValue(ByVal name As String, Optional ByVal utf8 As Boolean = False) As pdfValue
298+
If Left$(name, 1) <> "/" Then name = "/" & name
289299
Set NewNameValue = NewValue(name, "/Name")
300+
If utf8 Then NewNameValue.flags = flgUtf8
290301
End Function
291302

292303

@@ -307,12 +318,12 @@ Function NewValue(ByRef value As Variant, Optional ByRef valueType As String = v
307318
Select Case VarType(value)
308319
Case vbNull
309320
' always return same instance for Null
310-
Static NullValue As pdfValue
311-
If NullValue Is Nothing Then
321+
Static nullValue As pdfValue
322+
If nullValue Is Nothing Then
312323
obj.valueType = PDF_ValueType.PDF_Null
313-
Set NullValue = obj
324+
Set nullValue = obj
314325
Else
315-
Set obj = NullValue
326+
Set obj = nullValue
316327
End If
317328
Case vbLong, vbInteger
318329
obj.valueType = PDF_ValueType.PDF_Integer
@@ -335,7 +346,7 @@ Function NewValue(ByRef value As Variant, Optional ByRef valueType As String = v
335346
obj.value = CStr(value)
336347
End Select
337348
Case vbObject
338-
Select Case TypeName(value)
349+
Select Case typeName(value)
339350
Case "Dictionary"
340351
obj.valueType = PDF_ValueType.PDF_Dictionary
341352
Set obj.value = value
0 Bytes
Binary file not shown.

src/pdfLib.xlsm/Forms/ufPdfInfo.frm

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ Private Sub UpdateInfo()
5151
Dim v As Variant
5252
Dim obj As pdfValue
5353
For Each v In dict.Keys
54-
lbInfo.AddItem CStr(v) & "=" & dict.Item(v).value
54+
lbInfo.AddItem CStr(v) & "=" & dict.item(v).value
5555
Next v
5656
End If
5757
End Sub
@@ -60,6 +60,23 @@ Private Sub UpdatePages()
6060
lblPageCount = pdfDoc.pageCount
6161
End Sub
6262

63+
Private Sub UpdateNamedDestinations()
64+
If pdfDoc.Dests.valueType = PDF_ValueType.PDF_Dictionary Then
65+
Dim dict As Dictionary: Set dict = pdfDoc.Dests.asDictionary()
66+
If Not dict Is Nothing Then
67+
Dim v As Variant
68+
Dim obj As pdfValue
69+
For Each v In dict.Keys
70+
Set obj = dict(v)
71+
lbNamedDestinations.AddItem CStr(v) & ":" & BytesToString(obj.serialize())
72+
Next v
73+
Set v = Nothing
74+
End If
75+
Set dict = Nothing
76+
End If
77+
End Sub
78+
79+
6380

6481
Private Sub UserForm_Activate()
6582
' auto prompt for file to load if not passed programatically beforehand
@@ -75,5 +92,6 @@ Private Sub UserForm_Activate()
7592
lblFilename.Caption = pdfDoc.filepath & pdfDoc.filename
7693
UpdateInfo
7794
UpdatePages
95+
UpdateNamedDestinations
7896
End If
7997
End Sub
0 Bytes
Binary file not shown.

src/pdfLib.xlsm/Modules/Main.bas

Lines changed: 67 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@ End Sub
1717
Public Sub CombinePDFs(ByRef sourceFiles() As String, ByRef outFile As String)
1818
Dim combinedPdfDoc As pdfDocument: Set combinedPdfDoc = New pdfDocument
1919
' initialize with some basic structures
20-
combinedPdfDoc.AddInfo
21-
combinedPdfDoc.AddPages
22-
combinedPdfDoc.AddOutlines
23-
With combinedPdfDoc.rootCatalog.asDictionary()
24-
Set .Item("/PageMode") = pdfValue.NewNameValue("/UseOutlines") ' default is "/UseNone"
20+
With combinedPdfDoc
21+
.AddInfo
22+
.AddPages
23+
.AddOutlines
24+
With .rootCatalog.asDictionary()
25+
Set .item("/PageMode") = pdfValue.NewNameValue("/UseOutlines") ' default is "/UseNone"
26+
End With
2527
End With
2628

2729
Dim offset As Long
@@ -42,16 +44,53 @@ Public Sub CombinePDFs(ByRef sourceFiles() As String, ByRef outFile As String)
4244
pdfDoc.parsePdf
4345

4446
' adjust obj id's so no conflict with previously stored ones
45-
pdfDoc.renumberIds baseId
47+
pdfDoc.renumberIds baseId + 1 ' +1 as baseId used for our /OutlineItem
4648

49+
' add the first page of this document as a Named Destination to our combined pdf
50+
' Note: we assume current pageCount is how many existing pages there are, with
51+
' the next page being 1st of just loaded document, then subtract 1 as page# begins with 0
52+
Dim docDestinationName As pdfValue
53+
Set docDestinationName = pdfValue.NewNameValue("/" & pdfDoc.Title & ".1", utf8:=True)
54+
Dim docDestination As pdfValue
55+
Set docDestination = combinedPdfDoc.NewDestination(combinedPdfDoc.pageCount, PDF_FIT.PDF_FIT)
56+
combinedPdfDoc.AddNamedDestinations docDestinationName, docDestination
57+
58+
' copy over any pre-existing Named Destinations
59+
' Note: these are in a <<dictionary>> in /Root/Pages so not automatically included
60+
If pdfDoc.Dests.valueType = PDF_ValueType.PDF_Dictionary Then
61+
Dim dict As Dictionary: Set dict = pdfDoc.Dests.asDictionary()
62+
If Not dict Is Nothing Then
63+
Dim v As Variant
64+
For Each v In dict.Keys
65+
' we treat name as PDF_String instead of PDF_Name to avoid potential double escaping
66+
' and because while PDF_Name recommended, not required so could just be a PDF_String
67+
Dim name As pdfValue: Set name = pdfValue.NewValue(v)
68+
combinedPdfDoc.AddNamedDestinations name, dict(v)
69+
Next v
70+
Set v = Nothing
71+
End If
72+
Set dict = Nothing
73+
End If
74+
4775
' we inject a new top level /Pages object which we add all the document /Pages to
4876
' so we need to add this /Pages to our top level /Pages and add a /Parent indirect reference
49-
combinedPdfDoc.AddPages pdfDoc.pages
77+
combinedPdfDoc.AddPages pdfDoc.Pages
5078

5179
' we need to copy/merge some optional fields such as /Outline for bookmarks
52-
If pdfDoc.rootCatalog.hasKey("/Outlines") Then
80+
If False And pdfDoc.rootCatalog.hasKey("/Outlines") Then
81+
' Note: these are objs in pdfDoc so we need to remove if we add equivalent ones to combindedPdfDoc to avoid duplicates
5382
' hack for now, just copy over
54-
combinedPdfDoc.rootCatalog.asDictionary().Add "/Outlines", pdfDoc.rootCatalog.asDictionary().Item("/Outlines")
83+
'combinedPdfDoc.rootCatalog.asDictionary().Add "/Outlines", pdfDoc.rootCatalog.asDictionary().item("/Outlines")
84+
' TODO
85+
Else
86+
' defaults should include /First /Last along with /Count, /Title /Prev /Next and optionally /Dest
87+
Dim defaults As Dictionary: Set defaults = New Dictionary
88+
Set defaults("/Title") = pdfValue.NewValue(pdfDoc.Title)
89+
Set defaults("/Dest") = docDestinationName
90+
Dim outlineItem As pdfValue
91+
Set outlineItem = combinedPdfDoc.NewOutlineItem(combinedPdfDoc.Outlines, defaults)
92+
combinedPdfDoc.AddOutlines outlineItem
93+
' don't save here as we may need to adjust /Prev & /Next values
5594
End If
5695

5796
' remove /Root object and ensure only left with 1 /Root
@@ -67,13 +106,28 @@ Public Sub CombinePDFs(ByRef sourceFiles() As String, ByRef outFile As String)
67106

68107
' determine highest id used, 1st obj in next file will start at this + 1
69108
' Note: we need to use pdfDoc.xrefTable's size and not combinedPdfDoc.xrefTable as we are reserving full count from just loaded pdf document
70-
baseId = baseId + pdfDoc.xrefTable.Count - 1 ' highest id possible so far
109+
baseId = baseId + pdfDoc.xrefTable.count - 1 ' highest id possible so far
71110
combinedPdfDoc.nextObjId = baseId + 1
72111
DoEvents
73112
Next ndx
74113

114+
combinedPdfDoc.SavePdfObject outputFileNum, combinedPdfDoc.Outlines, offset
115+
If combinedPdfDoc.Outlines.hasKey("/First") Then
116+
Dim outlineObj As pdfValue
117+
Set outlineObj = combinedPdfDoc.Outlines.asDictionary().item("/First")
118+
Set outlineObj = combinedPdfDoc.getObject(outlineObj.value, outlineObj.generation)
119+
Do While Not outlineObj Is Nothing
120+
combinedPdfDoc.SavePdfObject outputFileNum, outlineObj, offset
121+
If outlineObj.hasKey("/Next") Then
122+
Set outlineObj = outlineObj.asDictionary().item("/Next")
123+
Set outlineObj = combinedPdfDoc.getObject(outlineObj.value, outlineObj.generation)
124+
Else
125+
Set outlineObj = Nothing
126+
End If
127+
Loop
128+
End If
75129
' save updated /Pages object (but not nested objects as already saved)
76-
combinedPdfDoc.SavePdfObject outputFileNum, combinedPdfDoc.pages, offset
130+
combinedPdfDoc.SavePdfObject outputFileNum, combinedPdfDoc.Pages, offset
77131
combinedPdfDoc.SavePdfObject outputFileNum, combinedPdfDoc.rootCatalog, offset
78132

79133
' writes out trailer and cross reference table
@@ -101,9 +155,9 @@ Function PickFiles(Optional ByVal AllowMultiSelect As Boolean = True) As String(
101155
If .Show = -1 Then
102156
'Step through each string in the FileDialogSelectedItems collection.
103157
Dim files() As String
104-
ReDim files(0 To .SelectedItems.Count - 1)
158+
ReDim files(0 To .SelectedItems.count - 1)
105159
Dim ndx As Long
106-
For ndx = 1 To .SelectedItems.Count
160+
For ndx = 1 To .SelectedItems.count
107161
files(ndx - 1) = .SelectedItems(ndx)
108162
Next ndx
109163
PickFiles = files

src/pdfLib.xlsm/Modules/Tests.bas

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ Sub TestProblemPdfs()
4545
Debug.Print "Error parsing pdf " & pdfDoc.filename
4646
End If
4747

48-
Debug.Print pdfDoc.pages.asDictionary.Item("/Count").value
48+
Debug.Print pdfDoc.Pages.asDictionary.item("/Count").value
4949

5050
Dim obj As pdfValue
5151
Set obj = pdfDoc.getObject(69, 0)
@@ -110,7 +110,7 @@ Sub TestZip()
110110
Debug.Print Err.Description & " (" & Err.Number & ")"
111111
Else
112112
For i = LBound(fileContent) To UBound(fileContent)
113-
Debug.Print Chr(fileContent(i));
113+
Debug.Print chr(fileContent(i));
114114
If fileContent(i) = 10 Then Debug.Print ""
115115
Next i
116116
End If

src/pdfLib.xlsm/Modules/pdfParseAndGetValues.bas

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ End Function
8888
Function GetDictionaryValue(ByRef value As pdfValue, ByVal name As String) As pdfValue
8989
On Error GoTo errHandler
9090
If value.value.Exists(name) Then
91-
Set GetDictionaryValue = value.value.Item(name)
91+
Set GetDictionaryValue = value.value.item(name)
9292
Else
9393
Set GetDictionaryValue = New pdfValue ' defaults to PDF_ValueType.PDF_Null
9494
End If

0 commit comments

Comments
 (0)