Skip to content

Commit

Permalink
Merge pull request #5095 from Snuffleupagus/issue-5070
Browse files Browse the repository at this point in the history
Adjust the heuristics to recognize more cases of unknown glyphs for |toUnicode| (issue 5070)
  • Loading branch information
yurydelendik committed Aug 5, 2014
2 parents 2b87ff9 + 8ecbb4d commit fa53fcb
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 17 deletions.
52 changes: 35 additions & 17 deletions src/core/fonts.js
Original file line number Diff line number Diff line change
Expand Up @@ -4299,6 +4299,7 @@ var Font = (function FontClosure() {
if (!properties.composite /* is simple font */) {
toUnicode = [];
var encoding = properties.defaultEncoding.slice();
var baseEncodingName = properties.baseEncodingName;
// Merge in the differences array.
var differences = properties.differences;
for (charcode in differences) {
Expand All @@ -4309,26 +4310,43 @@ var Font = (function FontClosure() {
var glyphName = encoding[charcode];
// b) Look up the character name in the Adobe Glyph List (see the
// Bibliography) to obtain the corresponding Unicode value.
if (glyphName === '' || !(glyphName in GlyphsUnicode)) {
if (glyphName === '') {
continue;
} else if (GlyphsUnicode[glyphName] === undefined) {
// (undocumented) c) Few heuristics to recognize unknown glyphs
// NOTE: Adobe Reader does not do this step, but OSX Preview does
var code;
// Gxx glyph
if (glyphName.length === 3 &&
glyphName[0] === 'G' &&
(code = parseInt(glyphName.substr(1), 16))) {
toUnicode[charcode] = String.fromCharCode(code);
}
// g00xx glyph
if (glyphName.length === 5 &&
glyphName[0] === 'g' &&
(code = parseInt(glyphName.substr(1), 16))) {
toUnicode[charcode] = String.fromCharCode(code);
var code = 0;
switch (glyphName[0]) {
case 'G': // Gxx glyph
if (glyphName.length === 3) {
code = parseInt(glyphName.substr(1), 16);
}
break;
case 'g': // g00xx glyph
if (glyphName.length === 5) {
code = parseInt(glyphName.substr(1), 16);
}
break;
case 'C': // Cddd glyph
case 'c': // cddd glyph
if (glyphName.length >= 3) {
code = +glyphName.substr(1);
}
break;
}
// Cddd glyph
if (glyphName.length >= 3 &&
glyphName[0] === 'C' &&
(code = +glyphName.substr(1))) {
if (code) {
// If |baseEncodingName| is one the predefined encodings,
// and |code| equals |charcode|, using the glyph defined in the
// baseEncoding seems to yield a better |toUnicode| mapping
// (fixes issue 5070).
if (baseEncodingName && code === +charcode) {
var baseEncoding = Encodings[baseEncodingName];
if (baseEncoding && (glyphName = baseEncoding[charcode])) {
toUnicode[charcode] =
String.fromCharCode(GlyphsUnicode[glyphName]);
continue;
}
}
toUnicode[charcode] = String.fromCharCode(code);
}
continue;
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
!issue925.pdf
!issue4668.pdf
!issue5039.pdf
!issue5070.pdf
!gradientfill.pdf
!bug903856.pdf
!bug850854.pdf
Expand Down
Binary file added test/pdfs/issue5070.pdf
Binary file not shown.
7 changes: 7 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,13 @@
"rounds": 1,
"type": "eq"
},
{ "id": "issue5070",
"file": "pdfs/issue5070.pdf",
"md5": "ec2ca0b4954c8390a5b3b0ffd79a8e92",
"link": false,
"rounds": 1,
"type": "eq"
},
{ "id": "issue1257",
"file": "pdfs/issue1257.pdf",
"md5": "9111533826bc21ed774e8e01603a2f54",
Expand Down

0 comments on commit fa53fcb

Please sign in to comment.