mirror of
https://github.com/ianstormtaylor/slate.git
synced 2025-08-10 09:13:59 +02:00
Fix emoji offsets (#3310)
* Fix emoji offsets * Fix linting * Remove the unused `forward` argument * Fix linting * Code review * Fix linting * Add newline to end of file
This commit is contained in:
committed by
Ian Storm Taylor
parent
1f367a25fa
commit
63a94a23c7
@@ -792,6 +792,7 @@ const PUNCTUATION = /[\u0021-\u0023\u0025-\u002A\u002C-\u002F\u003A\u003B\u003F\
|
||||
const CHAMELEON = /['\u2018\u2019]/
|
||||
const SURROGATE_START = 0xd800
|
||||
const SURROGATE_END = 0xdfff
|
||||
const ZERO_WIDTH_JOINER = 0x200d
|
||||
|
||||
/**
|
||||
* Check if a character is a word character. The `remaining` argument is used
|
||||
@@ -828,9 +829,76 @@ const isWordCharacter = (char: string, remaining: string): boolean => {
|
||||
*/
|
||||
|
||||
const getCharacterDistance = (text: string): number => {
|
||||
const code = text.charCodeAt(0)
|
||||
const isSurrogate = SURROGATE_START <= code && code <= SURROGATE_END
|
||||
return isSurrogate ? 2 : 1
|
||||
let offset = 0
|
||||
// prev types:
|
||||
// SURR: surrogate pair
|
||||
// MOD: modifier (technically also surrogate pair)
|
||||
// ZWJ: zero width joiner
|
||||
// VAR: variation selector
|
||||
// BMP: sequenceable character from basic multilingual plane
|
||||
let prev: 'SURR' | 'MOD' | 'ZWJ' | 'VAR' | 'BMP' | null = null
|
||||
let charCode = text.charCodeAt(0)
|
||||
|
||||
while (charCode) {
|
||||
if (isSurrogate(charCode)) {
|
||||
const modifier = isModifier(charCode, text, offset)
|
||||
|
||||
// Early returns are the heart of this function, where we decide if previous and current
|
||||
// codepoints should form a single character (in terms of how many of them should selection
|
||||
// jump over).
|
||||
if (prev === 'SURR' || prev === 'BMP') {
|
||||
break
|
||||
}
|
||||
|
||||
offset += 2
|
||||
prev = modifier ? 'MOD' : 'SURR'
|
||||
charCode = text.charCodeAt(offset)
|
||||
// Absolutely fine to `continue` without any checks because if `charCode` is NaN (which
|
||||
// is the case when out of `text` range), next `while` loop won"t execute and we"re done.
|
||||
continue
|
||||
}
|
||||
|
||||
if (charCode === ZERO_WIDTH_JOINER) {
|
||||
offset += 1
|
||||
prev = 'ZWJ'
|
||||
charCode = text.charCodeAt(offset)
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if (isBMPEmoji(charCode)) {
|
||||
if (prev && prev !== 'ZWJ' && prev !== 'VAR') {
|
||||
break
|
||||
}
|
||||
offset += 1
|
||||
prev = 'BMP'
|
||||
charCode = text.charCodeAt(offset)
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if (isVariationSelector(charCode)) {
|
||||
if (prev && prev !== 'ZWJ') {
|
||||
break
|
||||
}
|
||||
offset += 1
|
||||
prev = 'VAR'
|
||||
charCode = text.charCodeAt(offset)
|
||||
continue
|
||||
}
|
||||
|
||||
// Modifier 'groups up' with what ever character is before that (even whitespace), need to
|
||||
// look ahead.
|
||||
if (prev === 'MOD') {
|
||||
offset += 1
|
||||
break
|
||||
}
|
||||
|
||||
// If while loop ever gets here, we're done (e.g latin chars).
|
||||
break
|
||||
}
|
||||
|
||||
return offset || 1
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -863,6 +931,59 @@ const getWordDistance = (text: string): number => {
|
||||
return length
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if `code` is a surrogate
|
||||
*/
|
||||
|
||||
const isSurrogate = (code: number): boolean =>
|
||||
SURROGATE_START <= code && code <= SURROGATE_END
|
||||
|
||||
/**
|
||||
* Does `code` form Modifier with next one.
|
||||
*
|
||||
* https://emojipedia.org/modifiers/
|
||||
*/
|
||||
|
||||
const isModifier = (code: number, text: string, offset: number): boolean => {
|
||||
if (code === 0xd83c) {
|
||||
const next = text.charCodeAt(offset + 1)
|
||||
return next <= 0xdfff && next >= 0xdffb
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* Is `code` a Variation Selector.
|
||||
*
|
||||
* https://codepoints.net/variation_selectors
|
||||
*/
|
||||
|
||||
const isVariationSelector = (code: number): boolean => {
|
||||
return code <= 0xfe0f && code >= 0xfe00
|
||||
}
|
||||
|
||||
/**
|
||||
* Is `code` one of the BMP codes used in emoji sequences.
|
||||
*
|
||||
* https://emojipedia.org/emoji-zwj-sequences/
|
||||
*/
|
||||
|
||||
const isBMPEmoji = (code: number): boolean => {
|
||||
// This requires tiny bit of maintanance, better ideas?
|
||||
// Fortunately it only happens if new Unicode Standard
|
||||
// is released. Fails gracefully if upkeep lags behind,
|
||||
// same way Slate previously behaved with all emojis.
|
||||
return (
|
||||
code === 0x2764 || // heart (❤)
|
||||
code === 0x2642 || // male (♂)
|
||||
code === 0x2640 || // female (♀)
|
||||
code === 0x2620 || // scull (☠)
|
||||
code === 0x2695 || // medical (⚕)
|
||||
code === 0x2708 || // plane (✈️)
|
||||
code === 0x25ef // large circle (◯)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* A helper type for narrowing matched nodes with a predicate.
|
||||
*/
|
||||
|
Reference in New Issue
Block a user