diff --git a/src/core/fonts.js b/src/core/fonts.js index 78f1d66e046bf..76b4eac1ec271 100644 --- a/src/core/fonts.js +++ b/src/core/fonts.js @@ -4300,6 +4300,7 @@ var Font = (function FontClosure() { if (!properties.composite /* is simple font */) { toUnicode = []; var encoding = properties.defaultEncoding.slice(); + var baseEncodingName = properties.baseEncodingName; // Merge in the differences array. var differences = properties.differences; for (charcode in differences) { @@ -4310,26 +4311,43 @@ var Font = (function FontClosure() { var glyphName = encoding[charcode]; // b) Look up the character name in the Adobe Glyph List (see the // Bibliography) to obtain the corresponding Unicode value. - if (glyphName === '' || !(glyphName in GlyphsUnicode)) { + if (glyphName === '') { + continue; + } else if (GlyphsUnicode[glyphName] === undefined) { // (undocumented) c) Few heuristics to recognize unknown glyphs // NOTE: Adobe Reader does not do this step, but OSX Preview does - var code; - // Gxx glyph - if (glyphName.length === 3 && - glyphName[0] === 'G' && - (code = parseInt(glyphName.substr(1), 16))) { - toUnicode[charcode] = String.fromCharCode(code); - } - // g00xx glyph - if (glyphName.length === 5 && - glyphName[0] === 'g' && - (code = parseInt(glyphName.substr(1), 16))) { - toUnicode[charcode] = String.fromCharCode(code); + var code = 0; + switch (glyphName[0]) { + case 'G': // Gxx glyph + if (glyphName.length === 3) { + code = parseInt(glyphName.substr(1), 16); + } + break; + case 'g': // g00xx glyph + if (glyphName.length === 5) { + code = parseInt(glyphName.substr(1), 16); + } + break; + case 'C': // Cddd glyph + case 'c': // cddd glyph + if (glyphName.length >= 3) { + code = +glyphName.substr(1); + } + break; } - // Cddd glyph - if (glyphName.length >= 3 && - glyphName[0] === 'C' && - (code = +glyphName.substr(1))) { + if (code) { + // If |baseEncodingName| is one the predefined encodings, + // and |code| equals |charcode|, using the glyph defined in the + // baseEncoding seems to yield a better |toUnicode| mapping + // (fixes issue 5070). + if (baseEncodingName && code === +charcode) { + var baseEncoding = Encodings[baseEncodingName]; + if (baseEncoding && (glyphName = baseEncoding[charcode])) { + toUnicode[charcode] = + String.fromCharCode(GlyphsUnicode[glyphName]); + continue; + } + } toUnicode[charcode] = String.fromCharCode(code); } continue; diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 77da22580393d..f46810239f8cd 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -56,6 +56,7 @@ !issue925.pdf !issue4668.pdf !issue5039.pdf +!issue5070.pdf !gradientfill.pdf !bug903856.pdf !bug850854.pdf diff --git a/test/pdfs/issue5070.pdf b/test/pdfs/issue5070.pdf new file mode 100644 index 0000000000000..fe77923ca42e5 Binary files /dev/null and b/test/pdfs/issue5070.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index b3af44cf69439..15f1ea8cc32e4 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -1067,6 +1067,13 @@ "rounds": 1, "type": "eq" }, + { "id": "issue5070", + "file": "pdfs/issue5070.pdf", + "md5": "ec2ca0b4954c8390a5b3b0ffd79a8e92", + "link": false, + "rounds": 1, + "type": "eq" + }, { "id": "issue1257", "file": "pdfs/issue1257.pdf", "md5": "9111533826bc21ed774e8e01603a2f54",