1
0
mirror of https://github.com/ianstormtaylor/slate.git synced 2025-08-29 18:09:49 +02:00

Fix unicode offset (#3044)

* Merges pr-2680

* Handle end/beginning of node/text: we have to return 1 in order not to break jumping to next/previous node

* Update emojis example to insert emojis as plain text with shift+click.

* Fix eslint issues
This commit is contained in:
davidhoeller
2019-10-06 15:07:53 +02:00
committed by Ian Storm Taylor
parent d71ea08fdf
commit 8dce7538eb
4 changed files with 222 additions and 19 deletions

View File

@@ -37,7 +37,8 @@ const EMOJIS = [
'👻',
'🍔',
'🍑',
'🔑',
'👩‍❤️‍👩',
'👨‍👩‍👦',
]
/**
@@ -158,11 +159,16 @@ class Emojis extends React.Component {
onClickEmoji = (e, code) => {
e.preventDefault()
const { editor } = this
this.editor
.insertInline({ type: 'emoji', data: { code } })
.moveToStartOfNextText()
.focus()
if (e.shiftKey) {
editor.insertText(code)
} else {
editor.insertInline({ type: 'emoji', data: { code } })
editor.moveToStartOfNextText()
}
editor.focus()
}
}

View File

@@ -43,6 +43,17 @@
"text": "This example shows emojis in action."
}
]
},
{
"object": "block",
"type": "paragraph",
"nodes": [
{
"object": "text",
"text":
"Click on an emoji at the top to insert it as an inline node, shift-click to insert it as plain text."
}
]
}
]
}

View File

@@ -131,8 +131,18 @@ Commands.moveAnchorToStartOfText = editor => {
editor.command(pointEdgeObject, 'anchor', 'start', 'text')
}
Commands.moveBackward = (editor, ...args) => {
editor.moveAnchorBackward(...args).moveFocusBackward(...args)
Commands.moveBackward = (editor, chars = 1) => {
if (chars === 0) return
const { value } = editor
const { document, selection } = value
const { start } = selection
const startBlock = document.getClosestBlock(start.key)
const o = startBlock.getOffset(start.key)
const offset = o + start.offset
const { text } = startBlock
const charsOffset = TextUtils.getCharOffsetBackward(text, offset, chars)
editor.moveAnchorBackward(charsOffset).moveFocusBackward(charsOffset)
}
Commands.moveWordBackward = (editor, ...args) => {
@@ -355,8 +365,18 @@ Commands.moveFocusToStartOfText = editor => {
editor.command(pointEdgeObject, 'focus', 'start', 'text')
}
Commands.moveForward = (editor, ...args) => {
editor.moveAnchorForward(...args).moveFocusForward(...args)
Commands.moveForward = (editor, chars = 1) => {
if (chars === 0) return
const { value } = editor
const { document, selection } = value
const { start } = selection
const startBlock = document.getClosestBlock(start.path)
const o = startBlock.getOffset(start.key)
const offset = o + start.offset
const { text } = startBlock
const charsOffset = TextUtils.getCharOffsetForward(text, offset, chars)
editor.moveAnchorForward(charsOffset).moveFocusForward(charsOffset)
}
Commands.moveWordForward = (editor, ...args) => {

View File

@@ -45,6 +45,63 @@ function isSurrogate(code) {
return SURROGATE_START <= code && code <= SURROGATE_END
}
/**
* Does `code` form Modifier with next one.
*
* https://emojipedia.org/modifiers/
*
* @param {Number} code
* @param {String} text
* @param {Number} offset
* @return {Boolean}
*/
function isModifier(code, text, offset) {
if (code === 0xd83c) {
const next = text.charCodeAt(offset + 1)
return next <= 0xdfff && next >= 0xdffb
}
return false
}
/**
* Is `code` a Variation Selector.
*
* https://codepoints.net/variation_selectors
*
* @param {Number} code
* @return {Boolean}
*/
function isVariationSelector(code) {
return code <= 0xfe0f && code >= 0xfe00
}
/**
* Is `code` one of the BMP codes used in emoji sequences.
*
* https://emojipedia.org/emoji-zwj-sequences/
*
* @param {Number} code
* @return {Boolean}
*/
function isBMPEmoji(code) {
// This requires tiny bit of maintanance, better ideas?
// Fortunately it only happens if new Unicode Standard
// is released. Fails gracefully if upkeep lags behind,
// same way Slate previously behaved with all emojis.
return (
code === 0x2764 || // heart (❤)
code === 0x2642 || // male (♂)
code === 0x2640 || // female (♀)
code === 0x2620 || // scull (☠)
code === 0x2695 || // medical (⚕)
code === 0x2708 || // plane (✈️)
code === 0x25ef // large circle (◯)
)
}
/**
* Is a character a word character? Needs the `remaining` characters too.
*
@@ -81,42 +138,151 @@ function getCharLength(char) {
}
/**
* Get the offset to the end of the first character in `text`.
* Get the offset to the end of the character(s) in `text`.
* This function is emoji aware and handles them correctly.
*
* @param {String} text
* @param {Number} chars
* @param {Boolean} forward
* @return {Number}
*/
function getCharOffset(text) {
const char = text.charAt(0)
return getCharLength(char)
function getCharOffset(text, chars, forward) {
let offset = 0
// Handle end/beginning of node: we have to return 1 in order not to
// break cursor's jumping to next/previous node. We need to return early
// because otherwise, ''.charCodeAt(0) returned NaN and, the default
// handling 'latin characters' at the end of the while loop would
// would never be reached an we returned '0' as offset.
if (text === '') return 1
// Calculate offset sum of each character
for (let i = 0; i < chars; i++) {
// `prev` types (better ideas?):
// - SURR: surrogate pair
// - MOD: modifier (technically also surrogate pair)
// - ZWJ: zero width joiner
// - VAR: variation selector
// - BMP: sequenceable character from Basic Multilingual Plane
let prev = null
let charCode = text.charCodeAt(offset)
while (charCode) {
if (isSurrogate(charCode)) {
const modifier = isModifier(charCode, text, offset)
// Early returns are the heart of this loop where
// we decide if previous and current codepoints
// should form a single character (in other words:
// how many of them should selection jump over).
if (forward) {
if (
(!modifier && prev && prev !== 'ZWJ') ||
(modifier && prev && prev !== 'SURR')
) {
break
}
} else if (prev === 'SURR' || prev === 'BMP') {
break
}
offset += 2
prev = modifier ? 'MOD' : 'SURR'
charCode = text.charCodeAt(offset)
// It's okay to `continue` without checking
// because if `charCode` is NaN (which is
// the case when out of `text` range), next
// `while` loop won't execute and we're done.
continue
}
// If zero width joiner
if (charCode === 0x200d) {
offset += 1
prev = 'ZWJ'
charCode = text.charCodeAt(offset)
continue
}
if (isBMPEmoji(charCode)) {
if (
(forward && prev === 'VAR') ||
(prev && prev !== 'ZWJ' && prev !== 'VAR')
) {
break
}
offset += 1
prev = 'BMP'
charCode = text.charCodeAt(offset)
continue
}
if (isVariationSelector(charCode)) {
if (!forward && prev && prev !== 'ZWJ') {
break
}
offset += 1
prev = 'VAR'
charCode = text.charCodeAt(offset)
continue
}
// Modifier "fuses" with what ever character is before that
// (even whitespace), need to look ahead if loop gets here.
if (forward) {
const nextCharCode = text.charCodeAt(offset + 1)
if (isModifier(nextCharCode, text, offset + 1)) {
offset += 3
prev = 'MOD'
charCode = text.charCodeAt(offset)
continue
}
} else if (prev === 'MOD') {
offset += 1
break
}
// If while loop ever gets here, we're
// done (e.g Latin characters, length 1).
if (prev === null) offset += 1
break
}
}
return offset
}
/**
* Get the offset to the end of the character before an `offset` in `text`.
* Get the offset to the end of character(s) before an `offset` in `text`.
*
* @param {String} text
* @param {Number} offset
* @param {Number} chars
* @return {Number}
*/
function getCharOffsetBackward(text, offset) {
function getCharOffsetBackward(text, offset, chars = 1) {
text = text.slice(0, offset)
text = reverse(text)
return getCharOffset(text)
return getCharOffset(text, chars)
}
/**
* Get the offset to the end of the character after an `offset` in `text`.
* Get the offset to the end of character(s) after an `offset` in `text`.
*
* @param {String} text
* @param {Number} offset
* @param {Number} chars
* @return {Number}
*/
function getCharOffsetForward(text, offset) {
function getCharOffsetForward(text, offset, chars = 1) {
text = text.slice(offset)
return getCharOffset(text)
return getCharOffset(text, chars, true)
}
/**