Skip to content

Commit

Permalink
Refine vertical punctuation logic based on Unicode standard (#3608)
Browse files Browse the repository at this point in the history
* Refined vertical punctuation logic

Base the Unicode character blocks off of the official Unicode 9.0 character database.

Refined the logic distinguishing upright, rotated, and neutral characters based on Unicode Technical Report 50 (with some simplifications). In particular, not everything in the General Punctuation block is treated as having neutral orientation; instead, the vertical punctuation table is consulted.

* Removed commented-out supplementary plane entries

See mapbox/DEPRECATED-mapbox-gl#29 (comment) for an updated list.
  • Loading branch information
1ec5 authored Nov 14, 2016
1 parent b628786 commit f1d1bf4
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 46 deletions.
2 changes: 1 addition & 1 deletion js/symbol/shaping.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ function shapeText(text, glyphs, maxWidth, lineHeight, horizontalAlign, vertical

if (!glyph && codePoint !== newLine) continue;

if (!scriptDetection.charAllowsVerticalWritingMode(codePoint) || writingMode === WritingMode.horizontal) {
if (!scriptDetection.charHasUprightVerticalOrientation(codePoint) || writingMode === WritingMode.horizontal) {
positionedGlyphs.push(new PositionedGlyph(codePoint, x, yOffset, glyph, 0));
if (glyph) x += glyph.advance + spacing;

Expand Down
44 changes: 28 additions & 16 deletions js/util/is_char_in_unicode_block.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
'use strict';

// Adapted from https://github.com/jessetane/unicode-blocks/blob/master/Blocks.txt
// The following table comes from <http://www.unicode.org/Public/9.0.0/ucd/Blocks.txt>.
// Keep it synchronized with <http://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt>.

module.exports = {
// 'Basic Latin': (char) => char >= 0x0000 && char <= 0x007F,
// 'Latin-1 Supplement': (char) => char >= 0x0080 && char <= 0x00FF,
'Latin-1 Supplement': (char) => char >= 0x0080 && char <= 0x00FF,
// 'Latin Extended-A': (char) => char >= 0x0100 && char <= 0x017F,
// 'Latin Extended-B': (char) => char >= 0x0180 && char <= 0x024F,
// 'IPA Extensions': (char) => char >= 0x0250 && char <= 0x02AF,
Expand Down Expand Up @@ -42,7 +43,7 @@ module.exports = {
// 'Ethiopic': (char) => char >= 0x1200 && char <= 0x137F,
// 'Ethiopic Supplement': (char) => char >= 0x1380 && char <= 0x139F,
// 'Cherokee': (char) => char >= 0x13A0 && char <= 0x13FF,
// 'Unified Canadian Aboriginal Syllabics': (char) => char >= 0x1400 && char <= 0x167F,
'Unified Canadian Aboriginal Syllabics': (char) => char >= 0x1400 && char <= 0x167F,
// 'Ogham': (char) => char >= 0x1680 && char <= 0x169F,
// 'Runic': (char) => char >= 0x16A0 && char <= 0x16FF,
// 'Tagalog': (char) => char >= 0x1700 && char <= 0x171F,
Expand All @@ -51,7 +52,7 @@ module.exports = {
// 'Tagbanwa': (char) => char >= 0x1760 && char <= 0x177F,
// 'Khmer': (char) => char >= 0x1780 && char <= 0x17FF,
// 'Mongolian': (char) => char >= 0x1800 && char <= 0x18AF,
// 'Unified Canadian Aboriginal Syllabics Extended': (char) => char >= 0x18B0 && char <= 0x18FF,
'Unified Canadian Aboriginal Syllabics Extended': (char) => char >= 0x18B0 && char <= 0x18FF,
// 'Limbu': (char) => char >= 0x1900 && char <= 0x194F,
// 'Tai Le': (char) => char >= 0x1950 && char <= 0x197F,
// 'New Tai Lue': (char) => char >= 0x1980 && char <= 0x19DF,
Expand All @@ -64,6 +65,7 @@ module.exports = {
// 'Batak': (char) => char >= 0x1BC0 && char <= 0x1BFF,
// 'Lepcha': (char) => char >= 0x1C00 && char <= 0x1C4F,
// 'Ol Chiki': (char) => char >= 0x1C50 && char <= 0x1C7F,
// 'Cyrillic Extended-C': (char) => char >= 0x1C80 && char <= 0x1C8F,
// 'Sundanese Supplement': (char) => char >= 0x1CC0 && char <= 0x1CCF,
// 'Vedic Extensions': (char) => char >= 0x1CD0 && char <= 0x1CFF,
// 'Phonetic Extensions': (char) => char >= 0x1D00 && char <= 0x1D7F,
Expand All @@ -75,26 +77,26 @@ module.exports = {
// 'Superscripts and Subscripts': (char) => char >= 0x2070 && char <= 0x209F,
// 'Currency Symbols': (char) => char >= 0x20A0 && char <= 0x20CF,
// 'Combining Diacritical Marks for Symbols': (char) => char >= 0x20D0 && char <= 0x20FF,
// 'Letterlike Symbols': (char) => char >= 0x2100 && char <= 0x214F,
// 'Number Forms': (char) => char >= 0x2150 && char <= 0x218F,
'Letterlike Symbols': (char) => char >= 0x2100 && char <= 0x214F,
'Number Forms': (char) => char >= 0x2150 && char <= 0x218F,
// 'Arrows': (char) => char >= 0x2190 && char <= 0x21FF,
// 'Mathematical Operators': (char) => char >= 0x2200 && char <= 0x22FF,
// 'Miscellaneous Technical': (char) => char >= 0x2300 && char <= 0x23FF,
// 'Control Pictures': (char) => char >= 0x2400 && char <= 0x243F,
// 'Optical Character Recognition': (char) => char >= 0x2440 && char <= 0x245F,
// 'Enclosed Alphanumerics': (char) => char >= 0x2460 && char <= 0x24FF,
'Miscellaneous Technical': (char) => char >= 0x2300 && char <= 0x23FF,
'Control Pictures': (char) => char >= 0x2400 && char <= 0x243F,
'Optical Character Recognition': (char) => char >= 0x2440 && char <= 0x245F,
'Enclosed Alphanumerics': (char) => char >= 0x2460 && char <= 0x24FF,
// 'Box Drawing': (char) => char >= 0x2500 && char <= 0x257F,
// 'Block Elements': (char) => char >= 0x2580 && char <= 0x259F,
// 'Geometric Shapes': (char) => char >= 0x25A0 && char <= 0x25FF,
// 'Miscellaneous Symbols': (char) => char >= 0x2600 && char <= 0x26FF,
'Geometric Shapes': (char) => char >= 0x25A0 && char <= 0x25FF,
'Miscellaneous Symbols': (char) => char >= 0x2600 && char <= 0x26FF,
// 'Dingbats': (char) => char >= 0x2700 && char <= 0x27BF,
// 'Miscellaneous Mathematical Symbols-A': (char) => char >= 0x27C0 && char <= 0x27EF,
// 'Supplemental Arrows-A': (char) => char >= 0x27F0 && char <= 0x27FF,
// 'Braille Patterns': (char) => char >= 0x2800 && char <= 0x28FF,
// 'Supplemental Arrows-B': (char) => char >= 0x2900 && char <= 0x297F,
// 'Miscellaneous Mathematical Symbols-B': (char) => char >= 0x2980 && char <= 0x29FF,
// 'Supplemental Mathematical Operators': (char) => char >= 0x2A00 && char <= 0x2AFF,
// 'Miscellaneous Symbols and Arrows': (char) => char >= 0x2B00 && char <= 0x2BFF,
'Miscellaneous Symbols and Arrows': (char) => char >= 0x2B00 && char <= 0x2BFF,
// 'Glagolitic': (char) => char >= 0x2C00 && char <= 0x2C5F,
// 'Latin Extended-C': (char) => char >= 0x2C60 && char <= 0x2C7F,
// 'Coptic': (char) => char >= 0x2C80 && char <= 0x2CFF,
Expand All @@ -118,7 +120,7 @@ module.exports = {
'Enclosed CJK Letters and Months': (char) => char >= 0x3200 && char <= 0x32FF,
'CJK Compatibility': (char) => char >= 0x3300 && char <= 0x33FF,
'CJK Unified Ideographs Extension A': (char) => char >= 0x3400 && char <= 0x4DBF,
// 'Yijing Hexagram Symbols': (char) => char >= 0x4DC0 && char <= 0x4DFF,
'Yijing Hexagram Symbols': (char) => char >= 0x4DC0 && char <= 0x4DFF,
'CJK Unified Ideographs': (char) => char >= 0x4E00 && char <= 0x9FFF,
'Yi Syllables': (char) => char >= 0xA000 && char <= 0xA48F,
'Yi Radicals': (char) => char >= 0xA490 && char <= 0xA4CF,
Expand Down Expand Up @@ -151,15 +153,15 @@ module.exports = {
// 'High Surrogates': (char) => char >= 0xD800 && char <= 0xDB7F,
// 'High Private Use Surrogates': (char) => char >= 0xDB80 && char <= 0xDBFF,
// 'Low Surrogates': (char) => char >= 0xDC00 && char <= 0xDFFF,
// 'Private Use Area': (char) => char >= 0xE000 && char <= 0xF8FF,
'Private Use Area': (char) => char >= 0xE000 && char <= 0xF8FF,
'CJK Compatibility Ideographs': (char) => char >= 0xF900 && char <= 0xFAFF,
// 'Alphabetic Presentation Forms': (char) => char >= 0xFB00 && char <= 0xFB4F,
// 'Arabic Presentation Forms-A': (char) => char >= 0xFB50 && char <= 0xFDFF,
// 'Variation Selectors': (char) => char >= 0xFE00 && char <= 0xFE0F,
'Vertical Forms': (char) => char >= 0xFE10 && char <= 0xFE1F,
// 'Combining Half Marks': (char) => char >= 0xFE20 && char <= 0xFE2F,
'CJK Compatibility Forms': (char) => char >= 0xFE30 && char <= 0xFE4F,
// 'Small Form Variants': (char) => char >= 0xFE50 && char <= 0xFE6F,
'Small Form Variants': (char) => char >= 0xFE50 && char <= 0xFE6F,
// 'Arabic Presentation Forms-B': (char) => char >= 0xFE70 && char <= 0xFEFF,
'Halfwidth and Fullwidth Forms': (char) => char >= 0xFF00 && char <= 0xFFEF
// 'Specials': (char) => char >= 0xFFF0 && char <= 0xFFFF,
Expand All @@ -180,6 +182,7 @@ module.exports = {
// 'Deseret': (char) => char >= 0x10400 && char <= 0x1044F,
// 'Shavian': (char) => char >= 0x10450 && char <= 0x1047F,
// 'Osmanya': (char) => char >= 0x10480 && char <= 0x104AF,
// 'Osage': (char) => char >= 0x104B0 && char <= 0x104FF,
// 'Elbasan': (char) => char >= 0x10500 && char <= 0x1052F,
// 'Caucasian Albanian': (char) => char >= 0x10530 && char <= 0x1056F,
// 'Linear A': (char) => char >= 0x10600 && char <= 0x1077F,
Expand Down Expand Up @@ -214,13 +217,17 @@ module.exports = {
// 'Multani': (char) => char >= 0x11280 && char <= 0x112AF,
// 'Khudawadi': (char) => char >= 0x112B0 && char <= 0x112FF,
// 'Grantha': (char) => char >= 0x11300 && char <= 0x1137F,
// 'Newa': (char) => char >= 0x11400 && char <= 0x1147F,
// 'Tirhuta': (char) => char >= 0x11480 && char <= 0x114DF,
// 'Siddham': (char) => char >= 0x11580 && char <= 0x115FF,
// 'Modi': (char) => char >= 0x11600 && char <= 0x1165F,
// 'Mongolian Supplement': (char) => char >= 0x11660 && char <= 0x1167F,
// 'Takri': (char) => char >= 0x11680 && char <= 0x116CF,
// 'Ahom': (char) => char >= 0x11700 && char <= 0x1173F,
// 'Warang Citi': (char) => char >= 0x118A0 && char <= 0x118FF,
// 'Pau Cin Hau': (char) => char >= 0x11AC0 && char <= 0x11AFF,
// 'Bhaiksuki': (char) => char >= 0x11C00 && char <= 0x11C6F,
// 'Marchen': (char) => char >= 0x11C70 && char <= 0x11CBF,
// 'Cuneiform': (char) => char >= 0x12000 && char <= 0x123FF,
// 'Cuneiform Numbers and Punctuation': (char) => char >= 0x12400 && char <= 0x1247F,
// 'Early Dynastic Cuneiform': (char) => char >= 0x12480 && char <= 0x1254F,
Expand All @@ -231,6 +238,9 @@ module.exports = {
// 'Bassa Vah': (char) => char >= 0x16AD0 && char <= 0x16AFF,
// 'Pahawh Hmong': (char) => char >= 0x16B00 && char <= 0x16B8F,
// 'Miao': (char) => char >= 0x16F00 && char <= 0x16F9F,
// 'Ideographic Symbols and Punctuation': (char) => char >= 0x16FE0 && char <= 0x16FFF,
// 'Tangut': (char) => char >= 0x17000 && char <= 0x187FF,
// 'Tangut Components': (char) => char >= 0x18800 && char <= 0x18AFF,
// 'Kana Supplement': (char) => char >= 0x1B000 && char <= 0x1B0FF,
// 'Duployan': (char) => char >= 0x1BC00 && char <= 0x1BC9F,
// 'Shorthand Format Controls': (char) => char >= 0x1BCA0 && char <= 0x1BCAF,
Expand All @@ -241,7 +251,9 @@ module.exports = {
// 'Counting Rod Numerals': (char) => char >= 0x1D360 && char <= 0x1D37F,
// 'Mathematical Alphanumeric Symbols': (char) => char >= 0x1D400 && char <= 0x1D7FF,
// 'Sutton SignWriting': (char) => char >= 0x1D800 && char <= 0x1DAAF,
// 'Glagolitic Supplement': (char) => char >= 0x1E000 && char <= 0x1E02F,
// 'Mende Kikakui': (char) => char >= 0x1E800 && char <= 0x1E8DF,
// 'Adlam': (char) => char >= 0x1E900 && char <= 0x1E95F,
// 'Arabic Mathematical Alphabetic Symbols': (char) => char >= 0x1EE00 && char <= 0x1EEFF,
// 'Mahjong Tiles': (char) => char >= 0x1F000 && char <= 0x1F02F,
// 'Domino Tiles': (char) => char >= 0x1F030 && char <= 0x1F09F,
Expand Down
Loading

0 comments on commit f1d1bf4

Please sign in to comment.