Fix unicode offset (#3044)

* Merges pr-2680 * Handle end/beginning of node/text: we have to return 1 in order not to break jumping to next/previous node * Update emojis example to insert emojis as plain text with shift+click. * Fix eslint issues
2025-08-30 02:19:52 +02:00 · 2019-10-06 15:07:53 +02:00
parent d71ea08fdf
commit 8dce7538eb
4 changed files with 222 additions and 19 deletions
--- a/examples/emojis/index.js
+++ b/examples/emojis/index.js
@@ -37,7 +37,8 @@ const EMOJIS = [
  '👻',
  '🍔',
  '🍑',
-  '🔑',
+  '👩‍❤️‍👩',
  '👨‍👩‍👦',
 ]
 /**
@@ -158,11 +159,16 @@ class Emojis extends React.Component {
  onClickEmoji = (e, code) => {
    e.preventDefault()
    const { editor } = this
-    this.editor
+    if (e.shiftKey) {
-      .insertInline({ type: 'emoji', data: { code } })
+      editor.insertText(code)
-      .moveToStartOfNextText()
+    } else {
-      .focus()
+      editor.insertInline({ type: 'emoji', data: { code } })
      editor.moveToStartOfNextText()
    }
    editor.focus()
  }
 }
--- a/examples/emojis/value.json
+++ b/examples/emojis/value.json
@@ -43,6 +43,17 @@
            "text": "This example shows emojis in action."
          }
        ]
      },
      {
        "object": "block",
        "type": "paragraph",
        "nodes": [
          {
            "object": "text",
            "text":
              "Click on an emoji at the top to insert it as an inline node, shift-click to insert it as plain text."
          }
        ]
      }
    ]
  }
--- a/packages/slate/src/commands/on-selection.js
+++ b/packages/slate/src/commands/on-selection.js
@@ -131,8 +131,18 @@ Commands.moveAnchorToStartOfText = editor => {
  editor.command(pointEdgeObject, 'anchor', 'start', 'text')
 }
-Commands.moveBackward = (editor, ...args) => {
+Commands.moveBackward = (editor, chars = 1) => {
-  editor.moveAnchorBackward(...args).moveFocusBackward(...args)
+  if (chars === 0) return
  const { value } = editor
  const { document, selection } = value
  const { start } = selection
  const startBlock = document.getClosestBlock(start.key)
  const o = startBlock.getOffset(start.key)
  const offset = o + start.offset
  const { text } = startBlock
  const charsOffset = TextUtils.getCharOffsetBackward(text, offset, chars)
  editor.moveAnchorBackward(charsOffset).moveFocusBackward(charsOffset)
 }
 Commands.moveWordBackward = (editor, ...args) => {
@@ -355,8 +365,18 @@ Commands.moveFocusToStartOfText = editor => {
  editor.command(pointEdgeObject, 'focus', 'start', 'text')
 }
-Commands.moveForward = (editor, ...args) => {
+Commands.moveForward = (editor, chars = 1) => {
-  editor.moveAnchorForward(...args).moveFocusForward(...args)
+  if (chars === 0) return
  const { value } = editor
  const { document, selection } = value
  const { start } = selection
  const startBlock = document.getClosestBlock(start.path)
  const o = startBlock.getOffset(start.key)
  const offset = o + start.offset
  const { text } = startBlock
  const charsOffset = TextUtils.getCharOffsetForward(text, offset, chars)
  editor.moveAnchorForward(charsOffset).moveFocusForward(charsOffset)
 }
 Commands.moveWordForward = (editor, ...args) => {
--- a/packages/slate/src/utils/text-utils.js
+++ b/packages/slate/src/utils/text-utils.js
@@ -45,6 +45,63 @@ function isSurrogate(code) {
  return SURROGATE_START <= code && code <= SURROGATE_END
 }
 /**
 * Does `code` form Modifier with next one.
 *
 * https://emojipedia.org/modifiers/
 *
 * @param {Number} code
 * @param {String} text
 * @param {Number} offset
 * @return {Boolean}
 */
 function isModifier(code, text, offset) {
  if (code === 0xd83c) {
    const next = text.charCodeAt(offset + 1)
    return next <= 0xdfff && next >= 0xdffb
  }
  return false
 }
 /**
 * Is `code` a Variation Selector.
 *
 * https://codepoints.net/variation_selectors
 *
 * @param {Number} code
 * @return {Boolean}
 */
 function isVariationSelector(code) {
  return code <= 0xfe0f && code >= 0xfe00
 }
 /**
 * Is `code` one of the BMP codes used in emoji sequences.
 *
 * https://emojipedia.org/emoji-zwj-sequences/
 *
 * @param {Number} code
 * @return {Boolean}
 */
 function isBMPEmoji(code) {
  // This requires tiny bit of maintanance, better ideas?
  // Fortunately it only happens if new Unicode Standard
  // is released. Fails gracefully if upkeep lags behind,
  // same way Slate previously behaved with all emojis.
  return (
    code === 0x2764 || // heart (❤)
    code === 0x2642 || // male (♂)
    code === 0x2640 || // female (♀)
    code === 0x2620 || // scull (☠)
    code === 0x2695 || // medical (⚕)
    code === 0x2708 || // plane (✈️)
    code === 0x25ef // large circle (◯)
  )
 }
 /**
 * Is a character a word character? Needs the `remaining` characters too.
 *
@@ -81,42 +138,151 @@ function getCharLength(char) {
 }
 /**
- * Get the offset to the end of the first character in `text`.
+ * Get the offset to the end of the character(s) in `text`.
 * This function is emoji aware and handles them correctly.
 *
 * @param {String} text
 * @param {Number} chars
 * @param {Boolean} forward
 * @return {Number}
 */
-function getCharOffset(text) {
+function getCharOffset(text, chars, forward) {
-  const char = text.charAt(0)
+  let offset = 0
-  return getCharLength(char)
+
  // Handle end/beginning of node: we have to return 1 in order not to
  // break cursor's jumping to next/previous node. We need to return early
  // because otherwise, ''.charCodeAt(0) returned NaN and, the default
  // handling 'latin characters' at the end of the while loop would
  // would never be reached an we returned '0' as offset.
  if (text === '') return 1
  // Calculate offset sum of each character
  for (let i = 0; i < chars; i++) {
    // `prev` types (better ideas?):
    // - SURR: surrogate pair
    // - MOD: modifier (technically also surrogate pair)
    // - ZWJ: zero width joiner
    // - VAR: variation selector
    // - BMP: sequenceable character from Basic Multilingual Plane
    let prev = null
    let charCode = text.charCodeAt(offset)
    while (charCode) {
      if (isSurrogate(charCode)) {
        const modifier = isModifier(charCode, text, offset)
        // Early returns are the heart of this loop where
        // we decide if previous and current codepoints
        // should form a single character (in other words:
        // how many of them should selection jump over).
        if (forward) {
          if (
            (!modifier && prev && prev !== 'ZWJ') ||
            (modifier && prev && prev !== 'SURR')
          ) {
            break
          }
        } else if (prev === 'SURR' || prev === 'BMP') {
          break
        }
        offset += 2
        prev = modifier ? 'MOD' : 'SURR'
        charCode = text.charCodeAt(offset)
        // It's okay to `continue` without checking
        // because if `charCode` is NaN (which is
        // the case when out of `text` range), next
        // `while` loop won't execute and we're done.
        continue
      }
      // If zero width joiner
      if (charCode === 0x200d) {
        offset += 1
        prev = 'ZWJ'
        charCode = text.charCodeAt(offset)
        continue
      }
      if (isBMPEmoji(charCode)) {
        if (
          (forward && prev === 'VAR') ||
          (prev && prev !== 'ZWJ' && prev !== 'VAR')
        ) {
          break
        }
        offset += 1
        prev = 'BMP'
        charCode = text.charCodeAt(offset)
        continue
      }
      if (isVariationSelector(charCode)) {
        if (!forward && prev && prev !== 'ZWJ') {
          break
        }
        offset += 1
        prev = 'VAR'
        charCode = text.charCodeAt(offset)
        continue
      }
      // Modifier "fuses" with what ever character is before that
      // (even whitespace), need to look ahead if loop gets here.
      if (forward) {
        const nextCharCode = text.charCodeAt(offset + 1)
        if (isModifier(nextCharCode, text, offset + 1)) {
          offset += 3
          prev = 'MOD'
          charCode = text.charCodeAt(offset)
          continue
        }
      } else if (prev === 'MOD') {
        offset += 1
        break
      }
      // If while loop ever gets here, we're
      // done (e.g Latin characters, length 1).
      if (prev === null) offset += 1
      break
    }
  }
  return offset
 }
 /**
- * Get the offset to the end of the character before an `offset` in `text`.
+ * Get the offset to the end of character(s) before an `offset` in `text`.
 *
 * @param {String} text
 * @param {Number} offset
 * @param {Number} chars
 * @return {Number}
 */
-function getCharOffsetBackward(text, offset) {
+function getCharOffsetBackward(text, offset, chars = 1) {
  text = text.slice(0, offset)
  text = reverse(text)
-  return getCharOffset(text)
+  return getCharOffset(text, chars)
 }
 /**
- * Get the offset to the end of the character after an `offset` in `text`.
+ * Get the offset to the end of character(s) after an `offset` in `text`.
 *
 * @param {String} text
 * @param {Number} offset
 * @param {Number} chars
 * @return {Number}
 */
-function getCharOffsetForward(text, offset) {
+function getCharOffsetForward(text, offset, chars = 1) {
  text = text.slice(offset)
-  return getCharOffset(text)
+  return getCharOffset(text, chars, true)
 }
 /**