1
0
mirror of https://github.com/ianstormtaylor/slate.git synced 2025-02-23 16:55:23 +01:00
slate/lib/utils/string.js

214 lines
4.9 KiB
JavaScript

import { reverse } from 'esrever'
/**
* Surrogate pair start and end points.
*
* @type {Number}
*/
const SURROGATE_START = 0xD800
const SURROGATE_END = 0xDFFF
/**
* A regex to match space characters.
*
* @type {RegExp}
*/
const SPACE = /\s/
/**
* A regex to match chameleon characters, that count as word characters as long
* as they are inside of a word.
*
* @type {RegExp}
*/
const CHAMELEON = /['\u2018\u2019]/
/**
* A regex that matches punctuation.
*
* @type {RegExp}
*/
const PUNCTUATION = /[\u0021-\u0023\u0025-\u002A\u002C-\u002F\u003A\u003B\u003F\u0040\u005B-\u005D\u005F\u007B\u007D\u00A1\u00A7\u00AB\u00B6\u00B7\u00BB\u00BF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E3B\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]/
/**
* Is a character `code` in a surrogate character.
*
* @param {Number} code
* @return {Boolean}
*/
function isSurrogate(code) {
return SURROGATE_START <= code && code <= SURROGATE_END
}
/**
* Is a character a word character? Needs the `remaining` characters too.
*
* @param {String} char
* @param {String || Void} remaining
* @return {Boolean}
*/
function isWord(char, remaining) {
if (SPACE.test(char)) return false
// If it's a chameleon character, recurse to see if the next one is or not.
if (CHAMELEON.test(char)) {
const next = remaining.charAt(0)
const length = getCharLength(next)
const rest = remaining.slice(length)
if (isWord(next, rest)) return true
}
if (PUNCTUATION.test(char)) return false
return true
}
/**
* Get the length of a `character`.
*
* @param {String} char
* @return {Number}
*/
function getCharLength(char) {
return isSurrogate(char.charCodeAt(0))
? 2
: 1
}
/**
* Get the offset to the end of the first character in `text`.
*
* @param {String} text
* @return {Number}
*/
function getCharOffset(text) {
const char = text.charAt(0)
return getCharLength(char)
}
/**
* Get the offset to the end of the character before an `offset` in `text`.
*
* @param {String} text
* @param {Number} offset
* @return {Number}
*/
function getCharOffsetBackward(text, offset) {
text = text.slice(0, offset)
text = reverse(text)
return getCharOffset(text)
}
/**
* Get the offset to the end of the character after an `offset` in `text`.
*
* @param {String} text
* @param {Number} offset
* @return {Number}
*/
function getCharOffsetForward(text, offset) {
text = text.slice(offset)
return getCharOffset(text)
}
/**
* Get the length of a `string`.
*
* @param {String} string
* @return {Number}
*/
function getLength(string) {
let length = 0
for (
let i = 0, char = string.charAt(i);
i < string.length;
i += getCharLength(char)
) {
length++
}
return length
}
/**
* Get the offset to the end of the first word in `text`.
*
* @param {String} text
* @return {Number}
*/
function getWordOffset(text) {
let length = 0
let i = 0
let started = false
let char
while (char = text.charAt(i)) {
const l = getCharLength(char)
const rest = text.slice(i + l)
if (isWord(char, rest)) {
started = true
length++
} else if (!started) {
length++
} else {
break
}
i += l
}
return length
}
/**
* Get the offset to the end of the word before an `offset` in `text`.
*
* @param {String} text
* @param {Number} offset
* @return {Number}
*/
function getWordOffsetBackward(text, offset) {
text = text.slice(0, offset)
text = reverse(text)
return getWordOffset(text)
}
/**
* Get the offset to the end of the word after an `offset` in `text`.
*
* @param {String} text
* @param {Number} offset
* @return {Number}
*/
function getWordOffsetForward(text, offset) {
text = text.slice(offset)
return getWordOffset(text)
}
/**
* Export.
*/
export default {
getCharOffsetForward,
getCharOffsetBackward,
getWordOffsetBackward,
getWordOffsetForward
}