From 8dce7538eb467f79c607b804e1b32adc3424216f Mon Sep 17 00:00:00 2001 From: davidhoeller <49078752+davidhoeller@users.noreply.github.com> Date: Sun, 6 Oct 2019 15:07:53 +0200 Subject: [PATCH] Fix unicode offset (#3044) * Merges pr-2680 * Handle end/beginning of node/text: we have to return 1 in order not to break jumping to next/previous node * Update emojis example to insert emojis as plain text with shift+click. * Fix eslint issues --- examples/emojis/index.js | 16 +- examples/emojis/value.json | 11 ++ packages/slate/src/commands/on-selection.js | 28 ++- packages/slate/src/utils/text-utils.js | 186 ++++++++++++++++++-- 4 files changed, 222 insertions(+), 19 deletions(-) diff --git a/examples/emojis/index.js b/examples/emojis/index.js index e5f6fac99..8f22a6ce6 100644 --- a/examples/emojis/index.js +++ b/examples/emojis/index.js @@ -37,7 +37,8 @@ const EMOJIS = [ '👻', '🍔', '🍑', - '🔑', + '👩‍❤️‍👩', + '👨‍👩‍👦', ] /** @@ -158,11 +159,16 @@ class Emojis extends React.Component { onClickEmoji = (e, code) => { e.preventDefault() + const { editor } = this - this.editor - .insertInline({ type: 'emoji', data: { code } }) - .moveToStartOfNextText() - .focus() + if (e.shiftKey) { + editor.insertText(code) + } else { + editor.insertInline({ type: 'emoji', data: { code } }) + editor.moveToStartOfNextText() + } + + editor.focus() } } diff --git a/examples/emojis/value.json b/examples/emojis/value.json index fffe28215..746bf7321 100644 --- a/examples/emojis/value.json +++ b/examples/emojis/value.json @@ -43,6 +43,17 @@ "text": "This example shows emojis in action." } ] + }, + { + "object": "block", + "type": "paragraph", + "nodes": [ + { + "object": "text", + "text": + "Click on an emoji at the top to insert it as an inline node, shift-click to insert it as plain text." + } + ] } ] } diff --git a/packages/slate/src/commands/on-selection.js b/packages/slate/src/commands/on-selection.js index fd1bea760..7624faa4d 100644 --- a/packages/slate/src/commands/on-selection.js +++ b/packages/slate/src/commands/on-selection.js @@ -131,8 +131,18 @@ Commands.moveAnchorToStartOfText = editor => { editor.command(pointEdgeObject, 'anchor', 'start', 'text') } -Commands.moveBackward = (editor, ...args) => { - editor.moveAnchorBackward(...args).moveFocusBackward(...args) +Commands.moveBackward = (editor, chars = 1) => { + if (chars === 0) return + + const { value } = editor + const { document, selection } = value + const { start } = selection + const startBlock = document.getClosestBlock(start.key) + const o = startBlock.getOffset(start.key) + const offset = o + start.offset + const { text } = startBlock + const charsOffset = TextUtils.getCharOffsetBackward(text, offset, chars) + editor.moveAnchorBackward(charsOffset).moveFocusBackward(charsOffset) } Commands.moveWordBackward = (editor, ...args) => { @@ -355,8 +365,18 @@ Commands.moveFocusToStartOfText = editor => { editor.command(pointEdgeObject, 'focus', 'start', 'text') } -Commands.moveForward = (editor, ...args) => { - editor.moveAnchorForward(...args).moveFocusForward(...args) +Commands.moveForward = (editor, chars = 1) => { + if (chars === 0) return + + const { value } = editor + const { document, selection } = value + const { start } = selection + const startBlock = document.getClosestBlock(start.path) + const o = startBlock.getOffset(start.key) + const offset = o + start.offset + const { text } = startBlock + const charsOffset = TextUtils.getCharOffsetForward(text, offset, chars) + editor.moveAnchorForward(charsOffset).moveFocusForward(charsOffset) } Commands.moveWordForward = (editor, ...args) => { diff --git a/packages/slate/src/utils/text-utils.js b/packages/slate/src/utils/text-utils.js index 70083f11a..e9ae219d0 100644 --- a/packages/slate/src/utils/text-utils.js +++ b/packages/slate/src/utils/text-utils.js @@ -45,6 +45,63 @@ function isSurrogate(code) { return SURROGATE_START <= code && code <= SURROGATE_END } +/** + * Does `code` form Modifier with next one. + * + * https://emojipedia.org/modifiers/ + * + * @param {Number} code + * @param {String} text + * @param {Number} offset + * @return {Boolean} + */ + +function isModifier(code, text, offset) { + if (code === 0xd83c) { + const next = text.charCodeAt(offset + 1) + return next <= 0xdfff && next >= 0xdffb + } + return false +} + +/** + * Is `code` a Variation Selector. + * + * https://codepoints.net/variation_selectors + * + * @param {Number} code + * @return {Boolean} + */ + +function isVariationSelector(code) { + return code <= 0xfe0f && code >= 0xfe00 +} + +/** + * Is `code` one of the BMP codes used in emoji sequences. + * + * https://emojipedia.org/emoji-zwj-sequences/ + * + * @param {Number} code + * @return {Boolean} + */ + +function isBMPEmoji(code) { + // This requires tiny bit of maintanance, better ideas? + // Fortunately it only happens if new Unicode Standard + // is released. Fails gracefully if upkeep lags behind, + // same way Slate previously behaved with all emojis. + return ( + code === 0x2764 || // heart (❤) + code === 0x2642 || // male (♂) + code === 0x2640 || // female (♀) + code === 0x2620 || // scull (☠) + code === 0x2695 || // medical (⚕) + code === 0x2708 || // plane (✈️) + code === 0x25ef // large circle (◯) + ) +} + /** * Is a character a word character? Needs the `remaining` characters too. * @@ -81,42 +138,151 @@ function getCharLength(char) { } /** - * Get the offset to the end of the first character in `text`. + * Get the offset to the end of the character(s) in `text`. + * This function is emoji aware and handles them correctly. * * @param {String} text + * @param {Number} chars + * @param {Boolean} forward * @return {Number} */ -function getCharOffset(text) { - const char = text.charAt(0) - return getCharLength(char) +function getCharOffset(text, chars, forward) { + let offset = 0 + + // Handle end/beginning of node: we have to return 1 in order not to + // break cursor's jumping to next/previous node. We need to return early + // because otherwise, ''.charCodeAt(0) returned NaN and, the default + // handling 'latin characters' at the end of the while loop would + // would never be reached an we returned '0' as offset. + if (text === '') return 1 + + // Calculate offset sum of each character + for (let i = 0; i < chars; i++) { + // `prev` types (better ideas?): + // - SURR: surrogate pair + // - MOD: modifier (technically also surrogate pair) + // - ZWJ: zero width joiner + // - VAR: variation selector + // - BMP: sequenceable character from Basic Multilingual Plane + let prev = null + let charCode = text.charCodeAt(offset) + + while (charCode) { + if (isSurrogate(charCode)) { + const modifier = isModifier(charCode, text, offset) + + // Early returns are the heart of this loop where + // we decide if previous and current codepoints + // should form a single character (in other words: + // how many of them should selection jump over). + if (forward) { + if ( + (!modifier && prev && prev !== 'ZWJ') || + (modifier && prev && prev !== 'SURR') + ) { + break + } + } else if (prev === 'SURR' || prev === 'BMP') { + break + } + + offset += 2 + prev = modifier ? 'MOD' : 'SURR' + charCode = text.charCodeAt(offset) + // It's okay to `continue` without checking + // because if `charCode` is NaN (which is + // the case when out of `text` range), next + // `while` loop won't execute and we're done. + continue + } + + // If zero width joiner + if (charCode === 0x200d) { + offset += 1 + prev = 'ZWJ' + charCode = text.charCodeAt(offset) + continue + } + + if (isBMPEmoji(charCode)) { + if ( + (forward && prev === 'VAR') || + (prev && prev !== 'ZWJ' && prev !== 'VAR') + ) { + break + } + + offset += 1 + prev = 'BMP' + charCode = text.charCodeAt(offset) + continue + } + + if (isVariationSelector(charCode)) { + if (!forward && prev && prev !== 'ZWJ') { + break + } + + offset += 1 + prev = 'VAR' + charCode = text.charCodeAt(offset) + continue + } + + // Modifier "fuses" with what ever character is before that + // (even whitespace), need to look ahead if loop gets here. + if (forward) { + const nextCharCode = text.charCodeAt(offset + 1) + + if (isModifier(nextCharCode, text, offset + 1)) { + offset += 3 + prev = 'MOD' + charCode = text.charCodeAt(offset) + continue + } + } else if (prev === 'MOD') { + offset += 1 + break + } + + // If while loop ever gets here, we're + // done (e.g Latin characters, length 1). + if (prev === null) offset += 1 + break + } + } + + return offset } /** - * Get the offset to the end of the character before an `offset` in `text`. + * Get the offset to the end of character(s) before an `offset` in `text`. * * @param {String} text * @param {Number} offset + * @param {Number} chars * @return {Number} */ -function getCharOffsetBackward(text, offset) { +function getCharOffsetBackward(text, offset, chars = 1) { text = text.slice(0, offset) text = reverse(text) - return getCharOffset(text) + return getCharOffset(text, chars) } /** - * Get the offset to the end of the character after an `offset` in `text`. + * Get the offset to the end of character(s) after an `offset` in `text`. * * @param {String} text * @param {Number} offset + * @param {Number} chars * @return {Number} */ -function getCharOffsetForward(text, offset) { +function getCharOffsetForward(text, offset, chars = 1) { text = text.slice(offset) - return getCharOffset(text) + return getCharOffset(text, chars, true) } /**