Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions src/DeterministicIoPackaging/Patching/SheetPatcher.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
class SheetPatcher(SheetRelationshipPatcher relsPatcher) : IPatcher
{
static XNamespace xr = "http://schemas.microsoft.com/office/spreadsheetml/2014/revision";
static XNamespace r = "http://schemas.openxmlformats.org/officeDocument/2006/relationships";
static XName xName = xr + "uid";
static XName rId = r + "id";

public void PatchXml(XDocument xml, string entryName)
{
Expand All @@ -14,6 +16,104 @@ public void PatchXml(XDocument xml, string entryName)
{
RelationshipRenumber.RemapIds(xml, mapping);
}

if (relsPatcher.TargetMappings.TryGetValue(sheetName, out var targets) && targets.Count > 0)
{
NormalizeInterchangeableIds(xml, targets);
}
}

// When multiple relationships share the same target (e.g. two hyperlinks to the same URL),
// the DeterministicId assignment depends on the non-deterministic original order.
// Normalize by assigning the lowest DeterministicId to the earliest cell reference.
static void NormalizeInterchangeableIds(XDocument xml, Dictionary<string, string> targets)
{
// Find targets that are shared by multiple DeterministicIds
var targetToIds = new Dictionary<string, List<string>>();
foreach (var (id, target) in targets)
{
if (!targetToIds.TryGetValue(target, out var ids))
{
ids = [];
targetToIds[target] = ids;
}

ids.Add(id);
}

// Only process targets with multiple IDs (the ambiguous case)
var interchangeableGroups = new Dictionary<string, List<string>>();
foreach (var (target, ids) in targetToIds)
{
if (ids.Count > 1)
{
interchangeableGroups[target] = ids;
}
}

if (interchangeableGroups.Count == 0)
{
return;
}

// Build a set of all interchangeable IDs for quick lookup
var interchangeableIds = new HashSet<string>();
foreach (var ids in interchangeableGroups.Values)
{
foreach (var id in ids)
{
interchangeableIds.Add(id);
}
}

// Find all elements with r:id attributes that reference interchangeable IDs,
// grouped by target
var targetToElements = new Dictionary<string, List<XAttribute>>();
foreach (var attr in xml.Descendants().Attributes(rId))
{
if (!interchangeableIds.Contains(attr.Value))
{
continue;
}

var target = targets[attr.Value];
if (!targetToElements.TryGetValue(target, out var elements))
{
elements = [];
targetToElements[target] = elements;
}

elements.Add(attr);
}

// For each group, sort elements by a deterministic key (parent ref attribute, then element position)
// and assign DeterministicIds in sorted order
foreach (var (target, attrs) in targetToElements)
{
if (attrs.Count <= 1)
{
continue;
}

// Sort by the ref attribute of the parent element (cell reference like "B2"),
// falling back to string comparison of the current r:id value
attrs.Sort((a, b) =>
{
var refA = a.Parent?.Attribute("ref")?.Value ?? "";
var refB = b.Parent?.Attribute("ref")?.Value ?? "";
var cmp = string.Compare(refA, refB, StringComparison.Ordinal);
return cmp != 0 ? cmp : string.Compare(a.Value, b.Value, StringComparison.Ordinal);
});

// Collect and sort the DeterministicIds
var sortedIds = attrs.Select(_ => _.Value).Order(StringComparer.Ordinal).ToList();

// Assign in order
for (var i = 0; i < attrs.Count; i++)
{
attrs[i].SetValue(sortedIds[i]);
}
}
}

public bool IsMatch(Entry entry)
Expand Down
15 changes: 15 additions & 0 deletions src/DeterministicIoPackaging/Patching/SheetRelationshipPatcher.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
class SheetRelationshipPatcher : IPatcher
{
// oldId → newId (DeterministicIdN) per sheet
internal Dictionary<string, Dictionary<string, string>> IdMappings { get; } = [];

// DeterministicIdN → target URL per sheet.
// Used by SheetPatcher to normalize interchangeable IDs when multiple
// relationships share the same target (e.g. two hyperlinks to the same URL).
internal Dictionary<string, Dictionary<string, string>> TargetMappings { get; } = [];

public bool IsMatch(Entry entry) =>
entry.FullName.StartsWith("xl/worksheets/_rels/") &&
entry.FullName.EndsWith(".xml.rels");
Expand All @@ -16,6 +22,15 @@ public void PatchXml(XDocument xml, string entryName)
.Replace("xl/worksheets/_rels/", "")
.Replace(".rels", "");
IdMappings[sheetName] = mapping;

// Build DeterministicId → target lookup from the renumbered rels
var targets = new Dictionary<string, string>();
foreach (var rel in xml.Root!.Elements())
{
targets[rel.Attribute("Id")!.Value] = rel.Attribute("Target")!.Value;
}

TargetMappings[sheetName] = targets;
}
}
}
2 changes: 1 addition & 1 deletion src/Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<Project>
<PropertyGroup>
<NoWarn>CS1591;CS0649;CA1416;NU1608;NU1109;NU1510</NoWarn>
<Version>0.24.0</Version>
<Version>0.24.1</Version>
<LangVersion>preview</LangVersion>
<AssemblyVersion>1.0.0</AssemblyVersion>
<Description>Modify System.IO.Packaging (https://learn.microsoft.com/en-us/dotnet/api/system.io.packaging) files to ensure they are deterministic. Helpful for testing, build reproducibility, security verification, and ensuring package integrity across different build environments.</Description>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<sheetData>
<row r="2">
<c r="B2" t="inlineStr">
<is>
<t>Google</t>
</is>
</c>
<c r="C2" t="inlineStr">
<is>
<t>GitHub</t>
</is>
</c>
<c r="E2" t="inlineStr">
<is>
<t>Google Link</t>
</is>
</c>
</row>
</sheetData>
<hyperlinks>
<hyperlink ref="B2" r:id="DeterministicId2" display="Google" />
<hyperlink ref="C2" r:id="DeterministicId1" display="GitHub" />
<hyperlink ref="E2" r:id="DeterministicId3" display="Google Link" />
</hyperlinks>
</worksheet>
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<sheetData>
<row r="2">
<c r="B2" t="inlineStr">
<is>
<t>Google</t>
</is>
</c>
<c r="C2" t="inlineStr">
<is>
<t>GitHub</t>
</is>
</c>
<c r="E2" t="inlineStr">
<is>
<t>Google Link</t>
</is>
</c>
</row>
</sheetData>
<hyperlinks>
<hyperlink ref="B2" r:id="DeterministicId2" display="Google" />
<hyperlink ref="C2" r:id="DeterministicId1" display="GitHub" />
<hyperlink ref="E2" r:id="DeterministicId3" display="Google Link" />
</hyperlinks>
</worksheet>
116 changes: 116 additions & 0 deletions src/Tests/Patching/SheetPatcherTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,120 @@ public Task PatchWithHyperlinks()
var document = PatchHelper.Patch(new SheetPatcher(relsPatcher), xml, "xl/worksheets/sheet1.xml");
return Verify(document);
}

[Test]
public Task PatchWithDuplicateHyperlinkTargets()
{
// When multiple hyperlinks share the same target URL, the DeterministicId
// assignment must be normalized by cell reference to ensure determinism
// regardless of the original (non-deterministic) rId ordering.
var relsPatcher = new SheetRelationshipPatcher();
var relsXml =
"""
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships
xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship
Id="rId1"
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
Target="https://github.com"
TargetMode="External" />
<Relationship
Id="rId3"
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
Target="https://google.com"
TargetMode="External" />
<Relationship
Id="rId2"
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
Target="https://google.com"
TargetMode="External" />
</Relationships>
""";
PatchHelper.Patch(relsPatcher, relsXml, "xl/worksheets/_rels/sheet1.xml.rels");

// rId3 maps to E2, rId2 maps to B2.
// After rels renumbering: google.com gets DeterministicId2 and DeterministicId3
// (sorted by target, rId2 and rId3 are interchangeable).
// The normalization should ensure B2 always gets the lower DeterministicId.
var xml =
"""
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet
xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<sheetData>
<row r="2">
<c r="B2" t="inlineStr"><is><t>Google</t></is></c>
<c r="C2" t="inlineStr"><is><t>GitHub</t></is></c>
<c r="E2" t="inlineStr"><is><t>Google Link</t></is></c>
</row>
</sheetData>
<hyperlinks>
<hyperlink ref="B2" r:id="rId2" display="Google" />
<hyperlink ref="C2" r:id="rId1" display="GitHub" />
<hyperlink ref="E2" r:id="rId3" display="Google Link" />
</hyperlinks>
</worksheet>
""";
var document = PatchHelper.Patch(new SheetPatcher(relsPatcher), xml, "xl/worksheets/sheet1.xml");
return Verify(document);
}

[Test]
public Task PatchWithDuplicateHyperlinkTargets_ReversedIds()
{
// Same scenario as above but with rId2 and rId3 swapped in the rels file.
// This simulates a different Aspose run where the IDs are assigned differently.
// The output must be identical to PatchWithDuplicateHyperlinkTargets.
var relsPatcher = new SheetRelationshipPatcher();
var relsXml =
"""
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Relationships
xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
<Relationship
Id="rId1"
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
Target="https://github.com"
TargetMode="External" />
<Relationship
Id="rId2"
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
Target="https://google.com"
TargetMode="External" />
<Relationship
Id="rId3"
Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
Target="https://google.com"
TargetMode="External" />
</Relationships>
""";
PatchHelper.Patch(relsPatcher, relsXml, "xl/worksheets/_rels/sheet1.xml.rels");

// rId2 now maps to B2, rId3 maps to E2 (opposite of the other test).
// After normalization, the output should be identical.
var xml =
"""
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<worksheet
xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main"
xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">
<sheetData>
<row r="2">
<c r="B2" t="inlineStr"><is><t>Google</t></is></c>
<c r="C2" t="inlineStr"><is><t>GitHub</t></is></c>
<c r="E2" t="inlineStr"><is><t>Google Link</t></is></c>
</row>
</sheetData>
<hyperlinks>
<hyperlink ref="B2" r:id="rId3" display="Google" />
<hyperlink ref="C2" r:id="rId1" display="GitHub" />
<hyperlink ref="E2" r:id="rId2" display="Google Link" />
</hyperlinks>
</worksheet>
""";
var document = PatchHelper.Patch(new SheetPatcher(relsPatcher), xml, "xl/worksheets/sheet1.xml");
return Verify(document);
}
}
Loading