mirror of
https://github.com/ianstormtaylor/slate.git
synced 2025-02-23 16:55:23 +01:00
214 lines
4.9 KiB
JavaScript
214 lines
4.9 KiB
JavaScript
|
|
import { reverse } from 'esrever'
|
|
|
|
/**
|
|
* Surrogate pair start and end points.
|
|
*
|
|
* @type {Number}
|
|
*/
|
|
|
|
const SURROGATE_START = 0xD800
|
|
const SURROGATE_END = 0xDFFF
|
|
|
|
/**
|
|
* A regex to match space characters.
|
|
*
|
|
* @type {RegExp}
|
|
*/
|
|
|
|
const SPACE = /\s/
|
|
|
|
/**
|
|
* A regex to match chameleon characters, that count as word characters as long
|
|
* as they are inside of a word.
|
|
*
|
|
* @type {RegExp}
|
|
*/
|
|
|
|
const CHAMELEON = /['\u2018\u2019]/
|
|
|
|
/**
|
|
* A regex that matches punctuation.
|
|
*
|
|
* @type {RegExp}
|
|
*/
|
|
|
|
const PUNCTUATION = /[\u0021-\u0023\u0025-\u002A\u002C-\u002F\u003A\u003B\u003F\u0040\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E3B\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
|
|
|
|
/**
|
|
* Is a character `code` in a surrogate character.
|
|
*
|
|
* @param {Number} code
|
|
* @return {Boolean}
|
|
*/
|
|
|
|
function isSurrogate(code) {
|
|
return SURROGATE_START <= code && code <= SURROGATE_END
|
|
}
|
|
|
|
/**
|
|
* Is a character a word character? Needs the `remaining` characters too.
|
|
*
|
|
* @param {String} char
|
|
* @param {String || Void} remaining
|
|
* @return {Boolean}
|
|
*/
|
|
|
|
function isWord(char, remaining) {
|
|
if (SPACE.test(char)) return false
|
|
|
|
// If it's a chameleon character, recurse to see if the next one is or not.
|
|
if (CHAMELEON.test(char)) {
|
|
const next = remaining.charAt(0)
|
|
const length = getCharLength(next)
|
|
const rest = remaining.slice(length)
|
|
if (isWord(next, rest)) return true
|
|
}
|
|
|
|
if (PUNCTUATION.test(char)) return false
|
|
return true
|
|
}
|
|
|
|
/**
|
|
* Get the length of a `character`.
|
|
*
|
|
* @param {String} char
|
|
* @return {Number}
|
|
*/
|
|
|
|
function getCharLength(char) {
|
|
return isSurrogate(char.charCodeAt(0))
|
|
? 2
|
|
: 1
|
|
}
|
|
|
|
/**
|
|
* Get the offset to the end of the first character in `text`.
|
|
*
|
|
* @param {String} text
|
|
* @return {Number}
|
|
*/
|
|
|
|
function getCharOffset(text) {
|
|
const char = text.charAt(0)
|
|
return getCharLength(char)
|
|
}
|
|
|
|
/**
|
|
* Get the offset to the end of the character before an `offset` in `text`.
|
|
*
|
|
* @param {String} text
|
|
* @param {Number} offset
|
|
* @return {Number}
|
|
*/
|
|
|
|
function getCharOffsetBackward(text, offset) {
|
|
text = text.slice(0, offset)
|
|
text = reverse(text)
|
|
return getCharOffset(text)
|
|
}
|
|
|
|
/**
|
|
* Get the offset to the end of the character after an `offset` in `text`.
|
|
*
|
|
* @param {String} text
|
|
* @param {Number} offset
|
|
* @return {Number}
|
|
*/
|
|
|
|
function getCharOffsetForward(text, offset) {
|
|
text = text.slice(offset)
|
|
return getCharOffset(text)
|
|
}
|
|
|
|
/**
|
|
* Get the length of a `string`.
|
|
*
|
|
* @param {String} string
|
|
* @return {Number}
|
|
*/
|
|
|
|
function getLength(string) {
|
|
let length = 0
|
|
|
|
for (
|
|
let i = 0, char = string.charAt(i);
|
|
i < string.length;
|
|
i += getCharLength(char)
|
|
) {
|
|
length++
|
|
}
|
|
|
|
return length
|
|
}
|
|
|
|
/**
|
|
* Get the offset to the end of the first word in `text`.
|
|
*
|
|
* @param {String} text
|
|
* @return {Number}
|
|
*/
|
|
|
|
function getWordOffset(text) {
|
|
let length = 0
|
|
let i = 0
|
|
let started = false
|
|
let char
|
|
|
|
while (char = text.charAt(i)) {
|
|
const l = getCharLength(char)
|
|
const rest = text.slice(i + l)
|
|
|
|
if (isWord(char, rest)) {
|
|
started = true
|
|
length++
|
|
} else if (!started) {
|
|
length++
|
|
} else {
|
|
break
|
|
}
|
|
|
|
i += l
|
|
}
|
|
|
|
return length
|
|
}
|
|
|
|
/**
|
|
* Get the offset to the end of the word before an `offset` in `text`.
|
|
*
|
|
* @param {String} text
|
|
* @param {Number} offset
|
|
* @return {Number}
|
|
*/
|
|
|
|
function getWordOffsetBackward(text, offset) {
|
|
text = text.slice(0, offset)
|
|
text = reverse(text)
|
|
return getWordOffset(text)
|
|
}
|
|
|
|
/**
|
|
* Get the offset to the end of the word after an `offset` in `text`.
|
|
*
|
|
* @param {String} text
|
|
* @param {Number} offset
|
|
* @return {Number}
|
|
*/
|
|
|
|
function getWordOffsetForward(text, offset) {
|
|
text = text.slice(offset)
|
|
return getWordOffset(text)
|
|
}
|
|
|
|
/**
|
|
* Export.
|
|
*/
|
|
|
|
export default {
|
|
getCharOffsetForward,
|
|
getCharOffsetBackward,
|
|
getWordOffsetBackward,
|
|
getWordOffsetForward
|
|
}
|