Skip to content

Commit 43d5d4e

Browse files
authored
Merge pull request matplotlib#30566 from QuLogic/full-charmap-tracking
pdf/ps: Track full character map in CharacterTracker
2 parents d56936b + f192c87 commit 43d5d4e

File tree

3 files changed

+123
-50
lines changed

3 files changed

+123
-50
lines changed

lib/matplotlib/backends/_backend_pdf_ps.py

Lines changed: 94 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919

2020
if typing.TYPE_CHECKING:
21-
from .ft2font import FT2Font, GlyphIndexType
21+
from .ft2font import CharacterCodeType, FT2Font, GlyphIndexType
2222
from fontTools.ttLib import TTFont
2323

2424

@@ -107,23 +107,103 @@ class CharacterTracker:
107107
"""
108108
Helper for font subsetting by the PDF and PS backends.
109109
110-
Maintains a mapping of font paths to the set of glyphs that are being used from that
111-
font.
112-
"""
110+
Maintains a mapping of font paths to the set of characters and glyphs that are being
111+
used from that font.
112+
113+
Attributes
114+
----------
115+
subset_size : int
116+
The size at which characters are grouped into subsets.
117+
used : dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]]
118+
A dictionary of font files to character maps.
119+
120+
The key is a font filename and subset within that font.
113121
114-
def __init__(self) -> None:
115-
self.used: dict[str, set[GlyphIndexType]] = {}
122+
The value is a dictionary mapping a character code to a glyph index. Note this
123+
mapping is the inverse of FreeType, which maps glyph indices to character codes.
116124
117-
def track(self, font: FT2Font, s: str) -> None:
118-
"""Record that string *s* is being typeset using font *font*."""
125+
If *subset_size* is not set, then there will only be one subset per font
126+
filename.
127+
"""
128+
129+
def __init__(self, subset_size: int = 0):
130+
"""
131+
Parameters
132+
----------
133+
subset_size : int, optional
134+
The maximum size that is supported for an embedded font. If provided, then
135+
characters will be grouped into these sized subsets.
136+
"""
137+
self.used: dict[tuple[str, int], dict[CharacterCodeType, GlyphIndexType]] = {}
138+
self.subset_size = subset_size
139+
140+
def track(self, font: FT2Font, s: str) -> list[tuple[int, CharacterCodeType]]:
141+
"""
142+
Record that string *s* is being typeset using font *font*.
143+
144+
Parameters
145+
----------
146+
font : FT2Font
147+
A font that is being used for the provided string.
148+
s : str
149+
The string that should be marked as tracked by the provided font.
150+
151+
Returns
152+
-------
153+
list[tuple[int, CharacterCodeType]]
154+
A list of subset and character code pairs corresponding to the input string.
155+
If a *subset_size* is specified on this instance, then the character code
156+
will correspond with the given subset (and not necessarily the string as a
157+
whole). If *subset_size* is not specified, then the subset will always be 0
158+
and the character codes will be returned from the string unchanged.
159+
"""
160+
font_glyphs = []
119161
char_to_font = font._get_fontmap(s)
120162
for _c, _f in char_to_font.items():
121-
glyph_index = _f.get_char_index(ord(_c))
122-
self.used.setdefault(_f.fname, set()).add(glyph_index)
123-
124-
def track_glyph(self, font: FT2Font, glyph_index: GlyphIndexType) -> None:
125-
"""Record that glyph index *glyph_index* is being typeset using font *font*."""
126-
self.used.setdefault(font.fname, set()).add(glyph_index)
163+
charcode = ord(_c)
164+
glyph_index = _f.get_char_index(charcode)
165+
if self.subset_size != 0:
166+
subset = charcode // self.subset_size
167+
subset_charcode = charcode % self.subset_size
168+
else:
169+
subset = 0
170+
subset_charcode = charcode
171+
self.used.setdefault((_f.fname, subset), {})[subset_charcode] = glyph_index
172+
font_glyphs.append((subset, subset_charcode))
173+
return font_glyphs
174+
175+
def track_glyph(
176+
self, font: FT2Font, charcode: CharacterCodeType,
177+
glyph: GlyphIndexType) -> tuple[int, CharacterCodeType]:
178+
"""
179+
Record character code *charcode* at glyph index *glyph* as using font *font*.
180+
181+
Parameters
182+
----------
183+
font : FT2Font
184+
A font that is being used for the provided string.
185+
charcode : CharacterCodeType
186+
The character code to record.
187+
glyph : GlyphIndexType
188+
The corresponding glyph index to record.
189+
190+
Returns
191+
-------
192+
subset : int
193+
The subset in which the returned character code resides. If *subset_size*
194+
was not specified on this instance, then this is always 0.
195+
subset_charcode : CharacterCodeType
196+
The character code within the above subset. If *subset_size* was not
197+
specified on this instance, then this is just *charcode* unmodified.
198+
"""
199+
if self.subset_size != 0:
200+
subset = charcode // self.subset_size
201+
subset_charcode = charcode % self.subset_size
202+
else:
203+
subset = 0
204+
subset_charcode = charcode
205+
self.used.setdefault((font.fname, subset), {})[subset_charcode] = glyph
206+
return (subset, subset_charcode)
127207

128208

129209
class RendererPDFPSBase(RendererBase):

lib/matplotlib/backends/backend_pdf.py

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import sys
2020
import time
2121
import types
22-
import typing
2322
import warnings
2423
import zlib
2524

@@ -36,8 +35,7 @@
3635
from matplotlib.figure import Figure
3736
from matplotlib.font_manager import get_font, fontManager as _fontManager
3837
from matplotlib._afm import AFM
39-
from matplotlib.ft2font import (
40-
FT2Font, FaceFlags, GlyphIndexType, Kerning, LoadFlags, StyleFlags)
38+
from matplotlib.ft2font import FT2Font, FaceFlags, Kerning, LoadFlags, StyleFlags
4139
from matplotlib.transforms import Affine2D, BboxBase
4240
from matplotlib.path import Path
4341
from matplotlib.dates import UTC
@@ -962,9 +960,9 @@ def writeFonts(self):
962960
else:
963961
# a normal TrueType font
964962
_log.debug('Writing TrueType font.')
965-
glyphs = self._character_tracker.used.get(filename)
966-
if glyphs:
967-
fonts[Fx] = self.embedTTF(filename, glyphs)
963+
charmap = self._character_tracker.used.get((filename, 0))
964+
if charmap:
965+
fonts[Fx] = self.embedTTF(filename, charmap)
968966
self.writeObject(self.fontObject, fonts)
969967

970968
def _write_afm_font(self, filename):
@@ -1006,8 +1004,9 @@ def _embedTeXFont(self, dvifont):
10061004

10071005
# Reduce the font to only the glyphs used in the document, get the encoding
10081006
# for that subset, and compute various properties based on the encoding.
1009-
chars = frozenset(self._character_tracker.used[dvifont.fname])
1010-
t1font = t1font.subset(chars, self._get_subset_prefix(chars))
1007+
charmap = self._character_tracker.used[(dvifont.fname, 0)]
1008+
chars = frozenset(charmap.keys())
1009+
t1font = t1font.subset(chars, self._get_subset_prefix(charmap.values()))
10111010
fontdict['BaseFont'] = Name(t1font.prop['FontName'])
10121011
# createType1Descriptor writes the font data as a side effect
10131012
fontdict['FontDescriptor'] = self.createType1Descriptor(t1font)
@@ -1138,7 +1137,7 @@ def _get_xobject_glyph_name(self, filename, glyph_name):
11381137
end
11391138
end"""
11401139

1141-
def embedTTF(self, filename, glyphs):
1140+
def embedTTF(self, filename, charmap):
11421141
"""Embed the TTF font from the named file into the document."""
11431142
font = get_font(filename)
11441143
fonttype = mpl.rcParams['pdf.fonttype']
@@ -1154,7 +1153,7 @@ def cvt(length, upe=font.units_per_EM, nearest=True):
11541153
else:
11551154
return math.ceil(value)
11561155

1157-
def embedTTFType3(font, glyphs, descriptor):
1156+
def embedTTFType3(font, charmap, descriptor):
11581157
"""The Type 3-specific part of embedding a Truetype font"""
11591158
widthsObject = self.reserveObject('font widths')
11601159
fontdescObject = self.reserveObject('font descriptor')
@@ -1201,10 +1200,8 @@ def get_char_width(charcode):
12011200
# that we need from this font.
12021201
differences = []
12031202
multi_byte_chars = set()
1204-
charmap = {gind: ccode for ccode, gind in font.get_charmap().items()}
1205-
for gind in glyphs:
1203+
for ccode, gind in charmap.items():
12061204
glyph_name = font.get_glyph_name(gind)
1207-
ccode = charmap.get(gind)
12081205
if ccode is not None and ccode <= 255:
12091206
differences.append((ccode, glyph_name))
12101207
else:
@@ -1219,7 +1216,7 @@ def get_char_width(charcode):
12191216
last_c = c
12201217

12211218
# Make the charprocs array.
1222-
rawcharprocs = _get_pdf_charprocs(filename, glyphs)
1219+
rawcharprocs = _get_pdf_charprocs(filename, charmap.values())
12231220
charprocs = {}
12241221
for charname in sorted(rawcharprocs):
12251222
stream = rawcharprocs[charname]
@@ -1256,7 +1253,7 @@ def get_char_width(charcode):
12561253

12571254
return fontdictObject
12581255

1259-
def embedTTFType42(font, glyphs, descriptor):
1256+
def embedTTFType42(font, charmap, descriptor):
12601257
"""The Type 42-specific part of embedding a Truetype font"""
12611258
fontdescObject = self.reserveObject('font descriptor')
12621259
cidFontDictObject = self.reserveObject('CID font dictionary')
@@ -1266,8 +1263,9 @@ def embedTTFType42(font, glyphs, descriptor):
12661263
wObject = self.reserveObject('Type 0 widths')
12671264
toUnicodeMapObject = self.reserveObject('ToUnicode map')
12681265

1269-
_log.debug("SUBSET %s characters: %s", filename, glyphs)
1270-
with _backend_pdf_ps.get_glyphs_subset(filename, glyphs) as subset:
1266+
_log.debug("SUBSET %s characters: %s", filename, charmap)
1267+
with _backend_pdf_ps.get_glyphs_subset(filename,
1268+
charmap.values()) as subset:
12711269
fontdata = _backend_pdf_ps.font_as_file(subset)
12721270
_log.debug(
12731271
"SUBSET %s %d -> %d", filename,
@@ -1315,11 +1313,9 @@ def embedTTFType42(font, glyphs, descriptor):
13151313
cid_to_gid_map = ['\0'] * 65536
13161314
widths = []
13171315
max_ccode = 0
1318-
charmap = {gind: ccode for ccode, gind in font.get_charmap().items()}
1319-
for gind in glyphs:
1316+
for ccode, gind in charmap.items():
13201317
glyph = font.load_glyph(gind,
13211318
flags=LoadFlags.NO_SCALE | LoadFlags.NO_HINTING)
1322-
ccode = charmap[gind]
13231319
widths.append((ccode, cvt(glyph.horiAdvance)))
13241320
if ccode < 65536:
13251321
cid_to_gid_map[ccode] = chr(gind)
@@ -1358,8 +1354,8 @@ def embedTTFType42(font, glyphs, descriptor):
13581354

13591355
# Add XObjects for unsupported chars
13601356
glyph_indices = [
1361-
glyph_index for glyph_index in glyphs
1362-
if not _font_supports_glyph(fonttype, charmap[glyph_index])
1357+
glyph_index for ccode, glyph_index in charmap.items()
1358+
if not _font_supports_glyph(fonttype, ccode)
13631359
]
13641360

13651361
bbox = [cvt(x, nearest=False) for x in full_font.bbox]
@@ -1445,9 +1441,9 @@ def embedTTFType42(font, glyphs, descriptor):
14451441
}
14461442

14471443
if fonttype == 3:
1448-
return embedTTFType3(font, glyphs, descriptor)
1444+
return embedTTFType3(font, charmap, descriptor)
14491445
elif fonttype == 42:
1450-
return embedTTFType42(font, glyphs, descriptor)
1446+
return embedTTFType42(font, charmap, descriptor)
14511447

14521448
def alphaState(self, alpha):
14531449
"""Return name of an ExtGState that sets alpha to the given value."""
@@ -2212,7 +2208,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
22122208

22132209
self.file.output(Op.begin_text)
22142210
for font, fontsize, ccode, glyph_index, ox, oy in glyphs:
2215-
self.file._character_tracker.track_glyph(font, glyph_index)
2211+
self.file._character_tracker.track_glyph(font, ccode, glyph_index)
22162212
fontname = font.fname
22172213
if not _font_supports_glyph(fonttype, ccode):
22182214
# Unsupported chars (i.e. multibyte in Type 3 or beyond BMP in
@@ -2268,11 +2264,7 @@ def draw_tex(self, gc, x, y, s, prop, angle, *, mtext=None):
22682264
seq += [['font', pdfname, text.font.size]]
22692265
oldfont = text.font
22702266
seq += [['text', text.x, text.y, [bytes([text.glyph])], text.x+text.width]]
2271-
# TODO: This should use glyph indices, not character codes, but will be
2272-
# fixed soon.
2273-
self.file._character_tracker.track_glyph(text.font,
2274-
typing.cast('GlyphIndexType',
2275-
text.glyph))
2267+
self.file._character_tracker.track_glyph(text.font, text.glyph, text.index)
22762268

22772269
# Find consecutive text strings with constant y coordinate and
22782270
# combine into a sequence of strings and kerns, or just one

lib/matplotlib/backends/backend_ps.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -826,7 +826,7 @@ def draw_mathtext(self, gc, x, y, s, prop, angle):
826826
f"{angle:g} rotate\n")
827827
lastfont = None
828828
for font, fontsize, ccode, glyph_index, ox, oy in glyphs:
829-
self._character_tracker.track_glyph(font, glyph_index)
829+
self._character_tracker.track_glyph(font, ccode, glyph_index)
830830
if (font.postscript_name, fontsize) != lastfont:
831831
lastfont = font.postscript_name, fontsize
832832
self._pswriter.write(
@@ -1069,18 +1069,19 @@ def print_figure_impl(fh):
10691069
print("mpldict begin", file=fh)
10701070
print("\n".join(_psDefs), file=fh)
10711071
if not mpl.rcParams['ps.useafm']:
1072-
for font_path, glyphs in ps_renderer._character_tracker.used.items():
1073-
if not glyphs:
1072+
for (font, subset_index), charmap in \
1073+
ps_renderer._character_tracker.used.items():
1074+
if not charmap:
10741075
continue
10751076
fonttype = mpl.rcParams['ps.fonttype']
10761077
# Can't use more than 255 chars from a single Type 3 font.
1077-
if len(glyphs) > 255:
1078+
if len(charmap) > 255:
10781079
fonttype = 42
10791080
fh.flush()
10801081
if fonttype == 3:
1081-
fh.write(_font_to_ps_type3(font_path, glyphs))
1082+
fh.write(_font_to_ps_type3(font, charmap.values()))
10821083
else: # Type 42 only.
1083-
_font_to_ps_type42(font_path, glyphs, fh)
1084+
_font_to_ps_type42(font, charmap.values(), fh)
10841085
print("end", file=fh)
10851086
print("%%EndProlog", file=fh)
10861087

0 commit comments

Comments
 (0)