Skip to content

implemented rewrite_image for Document #197

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions MuPDF.NET.Test/DocumentTest.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using mupdf;
using System;
using System.IO;
using System.Collections.Generic;
using System.Data.Common;
using System.Linq;
Expand Down Expand Up @@ -142,5 +143,19 @@ public void OpenDocument()
Document doc = new Document("../../../resources/你好.pdf");
Assert.That(doc.PageCount, Is.EqualTo(1));
}

[Test]
public void TestRewriteImages()
{
// Example for decreasing file size by more than 30%.
string filePath = "../../../resources/test-rewrite-images.pdf";
Document doc = new Document(filePath);
int size0 = File.ReadAllBytes(filePath).Length;
doc.RewriteImage(dpiThreshold: 100, dpiTarget: 72, quality: 33);
byte[] data = doc.Write(garbage: true, deflate: true);
int size1 = data.Length;

Assert.That((1-(size1/size0)) > 0.3);
}
}
}
Binary file added MuPDF.NET.Test/resources/test-rewrite-images.pdf
Binary file not shown.
136 changes: 133 additions & 3 deletions MuPDF.NET/Document.cs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
using System;
using mupdf;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Linq.Expressions;
using System.Runtime.InteropServices;
using System.Security.Cryptography;
using System.Text;
using mupdf;
using Newtonsoft.Json.Linq;

namespace MuPDF.NET
{
Expand Down Expand Up @@ -1745,6 +1746,135 @@ List<Toc> Recurse(Outline _olItem, List<Toc> list, int _lvl)
return (new List<int>() { pno }, xp, yp);
}

/// <summary>
/// Rewrite images in a PDF document.
/// The typical use case is to reduce the size of the PDF by recompressing
/// images.Default parameters will convert all images to JPEG where
/// possible, using the specified resolutions and quality.Exclude
/// undesired images by setting parameters to False.
/// </summary>
/// <param name="dpiThreshold">look at images with a larger DPI only.</param>
/// <param name="dpiTarget">change eligible images to this DPI.</param>
/// <param name="quality">Quality of the recompressed images (0-100).</param>
/// <param name="lossy">process lossy image types (e.g. JPEG).</param>
/// <param name="lossless">process lossless image types (e.g. PNG).</param>
/// <param name="bitonal">process black-and-white images (e.g. FAX)</param>
/// <param name="color">process colored images.</param>
/// <param name="gray">process gray images.</param>
/// <param name="setToGray">whether to change the PDF to gray at process start.</param>
/// <param name="options">Custom options for image rewriting(optional).
/// Expert use only.If provided, other parameters are ignored, except set_to_gray.</param>
/// <returns></returns>
public void RewriteImage(
int dpiThreshold = -1,
int dpiTarget = 0,
int quality = 0,
bool lossy = true,
bool lossless = true,
bool bitonal = true,
bool color = true,
bool gray = true,
bool setToGray = false,
PdfImageRewriterOptions options = null
)
{
string qualityStr = quality.ToString();
if (dpiTarget < 0)
{
dpiThreshold = 0;
dpiTarget = 0;
}
if (dpiTarget > 0 && dpiTarget >= dpiThreshold)
{
throw new Exception($"dpi_target={dpiTarget} must be less than dpi_threshold={dpiThreshold}");
}

var templateOpts = new PdfImageRewriterOptions();
HashSet<string> dir1 = new HashSet<string>(templateOpts.GetType().GetMembers().Select(m => m.Name));

PdfImageRewriterOptions opts;
if (options == null)
{
opts = new PdfImageRewriterOptions();
if (bitonal == true)
{
opts.bitonal_image_recompress_method = mupdf.mupdf.FZ_RECOMPRESS_FAX;
opts.bitonal_image_subsample_method = mupdf.mupdf.FZ_SUBSAMPLE_AVERAGE;
opts.bitonal_image_subsample_to = dpiTarget;
opts.bitonal_image_recompress_quality = qualityStr;
opts.bitonal_image_subsample_threshold = dpiThreshold;
}
if (color == true)
{
if (lossless == true)
{
opts.color_lossless_image_recompress_method = mupdf.mupdf.FZ_RECOMPRESS_JPEG;
opts.color_lossless_image_subsample_method = mupdf.mupdf.FZ_SUBSAMPLE_AVERAGE;
opts.color_lossless_image_subsample_to = dpiTarget;
opts.color_lossless_image_subsample_threshold = dpiThreshold;
opts.color_lossless_image_recompress_quality = qualityStr;
}
if (lossy == true)
{
opts.color_lossy_image_recompress_method = mupdf.mupdf.FZ_RECOMPRESS_JPEG;
opts.color_lossy_image_subsample_method = mupdf.mupdf.FZ_SUBSAMPLE_AVERAGE;
opts.color_lossy_image_subsample_threshold = dpiThreshold;
opts.color_lossy_image_subsample_to = dpiTarget;
opts.color_lossy_image_recompress_quality = qualityStr;
}
}
if (gray == true)
{
if (lossless == true)
{
opts.gray_lossless_image_recompress_method = mupdf.mupdf.FZ_RECOMPRESS_JPEG;
opts.gray_lossless_image_subsample_method = mupdf.mupdf.FZ_SUBSAMPLE_AVERAGE;
opts.gray_lossless_image_subsample_to = dpiTarget;
opts.gray_lossless_image_subsample_threshold = dpiThreshold;
opts.gray_lossless_image_recompress_quality = qualityStr;
}
if (lossy == true)
{
opts.gray_lossy_image_recompress_method = mupdf.mupdf.FZ_RECOMPRESS_JPEG;
opts.gray_lossy_image_subsample_method = mupdf.mupdf.FZ_SUBSAMPLE_AVERAGE;
opts.gray_lossy_image_subsample_threshold = dpiThreshold;
opts.gray_lossy_image_subsample_to = dpiTarget;
opts.gray_lossy_image_recompress_quality = qualityStr;
}
}
}
else
opts = options;

var dir2 = new HashSet<string>(opts.GetType().GetMembers().Select(m => m.Name));
var invalidOptions = dir2.Except(dir1).ToList();
if (invalidOptions.Any())
{
throw new ArgumentException($"Invalid options: {string.Join(", ", invalidOptions)}");
}

if (setToGray == true)
this.Recolor(1);

PdfDocument pdf = Document.AsPdfDocument(_nativeDocument);
mupdf.mupdf.pdf_rewrite_images(pdf, opts);
}

/// <summary>
/// Change the color component count on all pages.
/// </summary>
/// <param name="components">(int) desired color component count, one of 1, 3, 4.</param>
/// <returns></returns>
public void Recolor(int components=1)
{
if (!IsPDF)
throw new ArgumentException("is no PDF");
for (int i = 0; i < this.PageCount; i++)
{
this.LoadPage(i).Recolor(components);
}
}

/// <summary>
/// Return string version of a PDF object definition.
/// </summary>
Expand Down
136 changes: 80 additions & 56 deletions MuPDF.NET/Widget.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
using System.Collections.Generic;
using System.Data;
using System.Linq;
using System.Security.Cryptography;

namespace MuPDF.NET
{
Expand Down Expand Up @@ -210,6 +209,10 @@ public override string ToString()
return $"Widget:(field_type={FieldTypeString}) script={Script}";
}

/// <summary>
/// Ensure text_font is from our list and correctly spelled.
/// </summary>
/// <returns></returns>
public void AdjustFont()
{
if (string.IsNullOrEmpty(TextFont))
Expand All @@ -228,6 +231,10 @@ public void AdjustFont()
return;
}

/// <summary>
/// Any widget type checks.
/// </summary>
/// <returns></returns>
public void Checker()
{
if (!(FieldType >= 1 && FieldType < 8))
Expand All @@ -250,14 +257,19 @@ public void Checker()
}
}

/// <summary>
/// Extract font name, size and color from default appearance string (/DA object).
/// Equivalent to 'pdf_parse_default_appearance' function in MuPDF's 'pdf-annot.c'.
/// </summary>
/// <returns></returns>
public void ParseDa()
{
if (string.IsNullOrEmpty(TextDa))
return;
string font = "Helv";
float fontSize = 0;
float[] col = { 0, 0, 0 };
string[] dat = TextDa.Split(' ');
string[] dat = TextDa.Split(' '); // split on any whitespace
for (int i = 0; i < dat.Length; i++)
{
string item = dat[i];
Expand Down Expand Up @@ -289,6 +301,10 @@ public void ParseDa()
TextDa = "";
}

/// <summary>
/// Validate the class entries.
/// </summary>
/// <returns></returns>
public void Validate()
{
if (Rect.IsInfinite || Rect.IsEmpty)
Expand All @@ -310,16 +326,21 @@ public void Validate()

BorderStyle = BorderStyle.ToUpper().Substring(0, 1);

// standardize content of JavaScript entries
bool btnType = (new List<PdfWidgetType> {
PdfWidgetType.PDF_WIDGET_TYPE_BUTTON,
PdfWidgetType.PDF_WIDGET_TYPE_CHECKBOX,
PdfWidgetType.PDF_WIDGET_TYPE_RADIOBUTTON}).Contains((PdfWidgetType)FieldType);
if (string.IsNullOrEmpty(Script))
Script = null;

// buttons cannot have the following script actions
if (btnType || string.IsNullOrEmpty(ScriptCalc))
ScriptCalc = null;

if (btnType || string.IsNullOrEmpty(ScriptChange))
ScriptChange = null;

if (btnType || string.IsNullOrEmpty(ScriptFormat))
ScriptFormat = null;

Expand All @@ -332,18 +353,72 @@ public void Validate()
if (btnType || string.IsNullOrEmpty(ScriptFocus))
ScriptFocus = null;

Checker();
Checker(); // any field_type specific checks
}

/// <summary>
/// Propagate the field flags.
/// If this widget has a "/Parent", set its field flags and that of all
/// its /Kids widgets to the value of the current widget.
/// Only possible for widgets existing in the PDF.
/// </summary>
/// <returns>true/false</returns>
public bool SyncFlags()
{
if (Xref == 0)
return false; // no xref: widget not in the PDF
Document doc = this.Parent.Parent; // the owning document
if (doc == null)
return false;
PdfDocument pdf = Document.AsPdfDocument(doc);
// load underlying PDF object
PdfObj pdf_widget = pdf.pdf_load_object(Xref);
PdfObj parent = pdf_widget.pdf_dict_get(new PdfObj("Parent"));
if (parent.pdf_is_dict() == 0)
return false; // no /Parent: nothing to do

// put the field flags value into the parent field flags:
parent.pdf_dict_put_int(new PdfObj("Ff"), this.FieldFlags);

// also put that value into all kids of the Parent
PdfObj kids = parent.pdf_dict_get(new PdfObj("Kids"));
if (kids.pdf_is_array() == 0)
{
Console.WriteLine("warning: malformed PDF, Parent has no Kids array");
return false; // no /Kids: should never happen!
}

for (int i = 0; i < kids.pdf_array_len(); i++)
{
// access kid widget, and do some precautionary checks
PdfObj kid = kids.pdf_array_get(i);
if (kid.pdf_is_dict() == 0)
continue; // not a dict: skip
int xref = kid.pdf_to_num(); // get xref of the kid
if (xref == this.Xref) // skip self widget
continue;
PdfObj subtype = kid.pdf_dict_get(new PdfObj("Subtype"));
if (subtype.pdf_to_name() != "Widget")
continue;
// put the field flags value into the kid field flags:
kid.pdf_dict_put_int(new PdfObj("Ff"), this.FieldFlags);
}

return true; // all done
}

/// <summary>
/// Return the names of On / Off (i.e. selected / clicked or not) states a button field may have. While the ‘Off’ state usually is also named like so, the ‘On’ state is often given a name relating to the functional context, for example ‘Yes’, ‘Female’, etc.
/// A button may have 'normal' or 'pressed down' appearances. While the 'Off'
/// state is usually called like this, the 'On' state is often given a name
/// relating to the functional context.
/// </summary>
/// <returns></returns>
public Dictionary<string, List<string>> ButtonStates()
{
if (!(FieldType == 2 || FieldType == 5))
return null;
Document doc = this.Parent.Parent;
return null; // no button type
Document doc = this.Parent.Parent; // field already exists on page
if (doc == null)
return null;

Expand Down Expand Up @@ -438,57 +513,6 @@ public void Reset()
Utils.ResetWidget(_annot);
}

/// <summary>
/// Propagate the field flags.
/// If this widget has a "/Parent", set its field flags and that of all
/// its /Kids widgets to the value of the current widget.
/// Only possible for widgets existing in the PDF.
/// </summary>
/// <returns>true/false</returns>
public bool SyncFlags()
{
if (Xref == 0)
return false; // no xref: widget not in the PDF
Document doc = this.Parent.Parent; // the owning document
if (doc == null)
return false;
PdfDocument pdf = Document.AsPdfDocument(doc);
// load underlying PDF object
PdfObj pdf_widget = pdf.pdf_load_object(Xref);
PdfObj parent = pdf_widget.pdf_dict_get(new PdfObj("Parent"));
if (parent.pdf_is_dict() == 0)
return false; // no /Parent: nothing to do

// put the field flags value into the parent field flags:
parent.pdf_dict_put_int(new PdfObj("Ff"), this.FieldFlags);

// also put that value into all kids of the Parent
PdfObj kids = parent.pdf_dict_get(new PdfObj("Kids"));
if (kids.pdf_is_array() == 0)
{
Console.WriteLine("warning: malformed PDF, Parent has no Kids array");
return false; // no /Kids: should never happen!
}

for (int i = 0; i < kids.pdf_array_len(); i++)
{
// access kid widget, and do some precautionary checks
PdfObj kid = kids.pdf_array_get(i);
if (kid.pdf_is_dict() == 0)
continue; // not a dict: skip
int xref = kid.pdf_to_num(); // get xref of the kid
if (xref == this.Xref) // skip self widget
continue;
PdfObj subtype = kid.pdf_dict_get(new PdfObj("Subtype"));
if (subtype.pdf_to_name() != "Widget")
continue;
// put the field flags value into the kid field flags:
kid.pdf_dict_put_int(new PdfObj("Ff"), this.FieldFlags);
}

return true; // all done
}

/// <summary>
/// After any changes to a widget, this method must be used to store them in the PDF
/// <param name="syncFlags">propagate field flags to parent and kids</param>
Expand Down