From 39f8ad3b35e14cfcabf3d5d916899f7ac78790f7 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Sun, 16 Jun 2024 11:04:13 +0200 Subject: [PATCH] Add better subsetting. (#76) --- Cargo.lock | 5 ++--- Cargo.toml | 2 +- src/render/text.rs | 53 +++++++++++++++++++++++----------------------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 209c6ab6..320adb4d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1540,9 +1540,8 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "subsetter" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09eab8a83bff89ba2200bd4c59be45c7c787f988431b936099a5a266c957f2f9" +version = "0.11.0" +source = "git+https://github.com/typst/subsetter?rev=4e0058b#4e0058b4b9a0948a5f79894111948d95e59ba350" [[package]] name = "svg2pdf" diff --git a/Cargo.toml b/Cargo.toml index 3c4aa4bd..ff0b1470 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ usvg = { version = "0.42.0", default-features = false } tiny-skia = "0.11.4" unicode-properties = "0.1.1" resvg = { version = "0.42.0", default-features = false } -subsetter = "0.1.1" +subsetter = {git = "https://github.com/typst/subsetter", rev = "4e0058b"} ttf-parser = { version = "0.21.1" } siphasher = { version = "1.0.1"} diff --git a/src/render/text.rs b/src/render/text.rs index d6b3e85a..6626287b 100644 --- a/src/render/text.rs +++ b/src/render/text.rs @@ -13,6 +13,7 @@ use siphasher::sip128::{Hasher128, SipHasher13}; use std::collections::{BTreeMap, HashMap}; use std::hash::Hash; use std::sync::Arc; +use subsetter::GlyphRemapper; use ttf_parser::{name_id, Face, GlyphId, PlatformId, Tag}; use unicode_properties::{GeneralCategory, UnicodeGeneralCategory}; use usvg::{Fill, Group, ImageKind, Node, PaintOrder, Stroke, Transform}; @@ -45,6 +46,7 @@ pub fn write_font( let data_ref = alloc.alloc_ref(); let glyph_set = &mut font.glyph_set; + let glyph_remapper = &font.glyph_remapper; // Do we have a TrueType or CFF font? // @@ -83,14 +85,10 @@ pub fn write_font( } let mut widths = vec![]; - for gid in std::iter::once(0).chain(glyph_set.keys().copied()) { - let width = ttf.glyph_hor_advance(GlyphId(gid)).unwrap_or(0); + for old_gid in glyph_remapper.remapped_gids() { + let width = ttf.glyph_hor_advance(GlyphId(old_gid)).unwrap_or(0); let units = (width as f64 / units_per_em as f64) * 1000.0; - let cid = glyph_cid(&ttf, gid); - if usize::from(cid) >= widths.len() { - widths.resize(usize::from(cid) + 1, 0.0); - widths[usize::from(cid)] = units as f32; - } + widths.push(units as f32); } // Write all non-zero glyph widths. @@ -156,12 +154,12 @@ pub fn write_font( font_descriptor.finish(); - let cmap = create_cmap(&ttf, glyph_set); + let cmap = + create_cmap(&ttf, glyph_set, glyph_remapper).ok_or(SubsetError(font.id))?; chunk.cmap(cmap_ref, &cmap.finish()); // Subset and write the font's bytes. - let glyphs: Vec<_> = glyph_set.keys().copied().collect(); - let data = subset_font(&font.face_data, font.face_index, &glyphs, font.id)?; + let data = subset_font(&font.face_data, font.face_index, glyph_remapper, font.id)?; let mut stream = chunk.stream(data_ref, &data); stream.filter(Filter::FlateDecode); @@ -174,7 +172,11 @@ pub fn write_font( } /// Create a /ToUnicode CMap. -fn create_cmap(ttf: &Face, glyph_set: &mut BTreeMap) -> UnicodeCmap { +fn create_cmap( + ttf: &Face, + glyph_set: &mut BTreeMap, + glyph_remapper: &GlyphRemapper, +) -> Option { // For glyphs that have codepoints mapping to them in the font's cmap table, // we prefer them over pre-existing text mappings from the document. Only // things that don't have a corresponding codepoint (or only a private-use @@ -201,24 +203,25 @@ fn create_cmap(ttf: &Face, glyph_set: &mut BTreeMap) -> UnicodeCmap // Produce a reverse mapping from glyphs' CIDs to unicode strings. let mut cmap = UnicodeCmap::new(CMAP_NAME, SYSTEM_INFO); for (&g, text) in glyph_set.iter() { + let new_gid = glyph_remapper.get(g)?; if !text.is_empty() { - cmap.pair_with_multiple(glyph_cid(ttf, g), text.chars()); + cmap.pair_with_multiple(new_gid, text.chars()); } } - cmap + Some(cmap) } fn subset_font( font_data: &[u8], index: u32, - glyphs: &[u16], + glyph_remapper: &GlyphRemapper, id: fontdb::ID, ) -> Result> { let data = font_data; - let profile = subsetter::Profile::pdf(glyphs); - let subsetted = subsetter::subset(data, index, profile); - let mut data = subsetted.as_deref().unwrap_or(data); + let subsetted = + subsetter::subset(data, index, glyph_remapper).map_err(|_| SubsetError(id))?; + let mut data = subsetted.as_ref(); // Extract the standalone CFF font program if applicable. let face = ttf_parser::RawFace::parse(data, 0).map_err(|_| SubsetError(id))?; @@ -265,7 +268,8 @@ pub fn render( let name = font_names.get(&font.reference).unwrap(); - let gid = glyph.id.0; + // TODO: Remove unwraps and switch to error-based handling. + let cid = font.glyph_remapper.get(glyph.id.0).unwrap(); let ts = glyph .outline_transform() .pre_scale(font.units_per_em as f32, font.units_per_em as f32) @@ -277,7 +281,7 @@ pub fn render( content.begin_text(); content.set_text_matrix(ts.to_pdf_transform()); content.set_font(Name(name.as_bytes()), span.font_size.get()); - content.show(Str(&[(gid >> 8) as u8, (gid & 0xff) as u8])); + content.show(Str(&[(cid >> 8) as u8, (cid & 0xff) as u8])); content.end_text(); content.restore_state(); } @@ -451,13 +455,6 @@ fn decode_mac_roman(coded: &[u8]) -> String { coded.iter().copied().map(char_from_mac_roman).collect() } -fn glyph_cid(ttf: &Face, glyph_id: u16) -> u16 { - ttf.tables() - .cff - .and_then(|cff| cff.glyph_cid(GlyphId(glyph_id))) - .unwrap_or(glyph_id) -} - /// Extra methods for [`[T]`](slice). pub trait SliceExt { /// Split a slice into consecutive runs with the same key and yield for @@ -501,6 +498,7 @@ where pub struct Font { pub id: fontdb::ID, pub glyph_set: BTreeMap, + pub glyph_remapper: GlyphRemapper, pub reference: Ref, pub face_data: Arc>, pub units_per_em: u16, @@ -525,12 +523,14 @@ pub fn fill_fonts(group: &Group, ctx: &mut Context, fontdb: &fontdb::Database) { { let reference = allocator.alloc_ref(); let glyph_set = BTreeMap::new(); + let glyph_remapper = GlyphRemapper::new(); return Some(Font { id: g.font, reference, face_data: Arc::new(Vec::from(data)), units_per_em: ttf.units_per_em(), glyph_set, + glyph_remapper, face_index, }); } @@ -542,6 +542,7 @@ pub fn fill_fonts(group: &Group, ctx: &mut Context, fontdb: &fontdb::Database) { if let Some(ref mut font) = font { font.glyph_set.insert(g.id.0, g.text.clone()); + font.glyph_remapper.remap(g.id.0); } } }