1
0
mirror of https://github.com/Pomax/BezierInfo-2.git synced 2025-08-05 16:27:45 +02:00
Files
BezierInfo-2/tools/lib/chunk.js
Mike Kamermans 2aa657fe9e Update chunk.js
2017-04-14 08:29:58 -07:00

366 lines
12 KiB
JavaScript

/**
*
* This file defines all the chunking functions that can be used to take
* a source markdown file, and then chop it up into blocks that should
* either be converted as markdown, or should be left alone.
*
* Getting a final HTML file using this entirely not-optimized solution
* simply involves calling:
*
* const convert = require(some markdown converter)
* const chunk = require(this file)
* const html = chunk(markdown).map(c => c.convert ? convert(c.data) : c.data).join('\n')
*
* And you're done.
*
*/
// fix the stupid nonsense inability for markdown parsers to see link
// syntax with `)` in the links themselves
function fixMarkDownLinks(data, chunks, chunkMore) {
var next = chunkMore ? chunkMore[0] : false,
otherChunkers = chunkMore ? chunkMore.slice(1) : false,
fixes = [];
data.replace(/\[[^\]]+\]\(/g, function(_match, pos, _fullstring) {
// this is the start of a link. Find the offset at which the next `)`
// is actually the link closer.
var offset = 0;
var start = pos + _match.length;
var complex = false;
for (let d=0, i=start; i<data.length; i++) {
if (data[i] === '(') { d++; complex = true; }
else if (data[i] === ')') { d--; }
if (d<0) { offset = i - start; break; }
}
var end = start + offset;
// we now know the *actual* link length. Safify it.
if (complex) { fixes.push({ start, end, data: data.substring(start,end) }); }
// and return the matched text because we don't want to replace right now.
return _match
});
// let's safify this data, if there was a complex pattern that needs fixin'
if (fixes.length>0) {
fixes.forEach(fix => {
let s = fix.start,
e = fix.end,
newdata = fix.data.replace(/\(/g, '%28').replace(/\)/g, '%29');
// I can't believe I still have to do this in 2017...
data = data.substring(0,s) + newdata + data.substring(e);
});
}
// alright, let "the rest" deal with this data now.
performChunking(data, chunks, next, otherChunkers);
}
/**
* ...
*/
function chunkStyleTags(data, chunks, chunkMore) {
var p = 0,
next = chunkMore ? chunkMore[0] : false,
otherChunkers = chunkMore ? chunkMore.slice(1) : false,
styleTag = '<style>',
styleTagEnd = '</style>';
while (p !== -1) {
// Let's check a BSplineGraphic tag
let style = data.indexOf(styleTag, p);
if (style === -1) {
// No <BSplineGraphic/> block found: we're done here. Parse the remaining
// data for whatever else might be in there.
performChunking(data.substring(p), chunks, next, otherChunkers);
break;
}
// First parse the non-<BSplineGraphic/> data for whatever else might be in there.
performChunking(data.substring(p, style), chunks, next, otherChunkers);
let tail = data.substring(style), eol, styledata;
// Then capture the <BSplineGraphic>...</BSplineGraphic> or <BSplineGraphic .../> block and mark it as "don't convert".
eol = data.indexOf(styleTagEnd, style) + styleTagEnd.length;
styledata = data.substring(style, eol);
styledata = styledata.replace(/([{}])/g,"{'$1'}");
chunks.push({ convert: false, type: "style", s:style, e:eol, data: styledata });
p = eol;
}
}
/**
* ...
*/
function chunkScriptTags(data, chunks, chunkMore) {
var p = 0,
next = chunkMore ? chunkMore[0] : false,
otherChunkers = chunkMore ? chunkMore.slice(1) : false,
scriptTag = '<script',
scriptTagEnd = '</script>';
while (p !== -1) {
// Let's check a BSplineGraphic tag
let script = data.indexOf(scriptTag, p);
if (script === -1) {
// No <BSplineGraphic/> block found: we're done here. Parse the remaining
// data for whatever else might be in there.
performChunking(data.substring(p), chunks, next, otherChunkers);
break;
}
// First parse the non-<BSplineGraphic/> data for whatever else might be in there.
performChunking(data.substring(p, script), chunks, next, otherChunkers);
let tail = data.substring(script), eol, scriptdata;
// Then capture the <BSplineGraphic>...</BSplineGraphic> or <BSplineGraphic .../> block and mark it as "don't convert".
eol = data.indexOf(scriptTagEnd, script) + scriptTagEnd.length;
scriptdata = data.substring(script, eol);
scriptdata.replace(/\/\*[\w\s]+\*\//g,'');
chunks.push({ convert: false, type: "script", s:script, e:eol, data:scriptdata });
p = eol;
}
}
/**
* ...
*/
function chunkBSplineGraphicsJSX(data, chunks, chunkMore) {
var p = 0,
next = chunkMore ? chunkMore[0] : false,
otherChunkers = chunkMore ? chunkMore.slice(1) : false,
bgfxTag = '<BSplineGraphic',
bgfxEnd = '/>',
bgfxEnd2 = '</BSplineGraphic>';
while (p !== -1) {
// Let's check a BSplineGraphic tag
let bgfx = data.indexOf(bgfxTag, p);
if (bgfx === -1) {
// No <BSplineGraphic/> block found: we're done here. Parse the remaining
// data for whatever else might be in there.
performChunking(data.substring(p), chunks, next, otherChunkers);
break;
}
// First parse the non-<BSplineGraphic/> data for whatever else might be in there.
performChunking(data.substring(p, bgfx), chunks, next, otherChunkers);
let tail = data.substring(bgfx),
noContent = !!tail.match(/^<BSplineGraphic[^>]+\/>/),
eol;
// Then capture the <BSplineGraphic>...</BSplineGraphic> or <BSplineGraphic .../> block and mark it as "don't convert".
if (noContent) {
eol = data.indexOf(bgfxEnd, bgfx) + bgfxEnd.length;
} else {
eol = data.indexOf(bgfxEnd2, bgfx) + bgfxEnd2.length;
}
chunks.push({ convert: false, type: "bgfx", s:bgfx, e:eol, data: data.substring(bgfx, eol) });
p = eol;
}
}
/**
* ...
*/
function chunkGraphicJSX(data, chunks, chunkMore) {
var p = 0,
next = chunkMore ? chunkMore[0] : false,
otherChunkers = chunkMore ? chunkMore.slice(1) : false,
gfxTag = '<Graphic',
gfxEnd = '/>',
gfxEnd2 = '</Graphic>';
while (p !== -1) {
// Let's check a Graphic tag
let gfx = data.indexOf(gfxTag, p);
if (gfx === -1) {
// No <Graphic/> block found: we're done here. Parse the remaining
// data for whatever else might be in there.
performChunking(data.substring(p), chunks, next, otherChunkers);
break;
}
// First parse the non-<Graphic/> data for whatever else might be in there.
performChunking(data.substring(p, gfx), chunks, next, otherChunkers);
let tail = data.substring(gfx),
noContent = !!tail.match(/^<Graphic[^>]+\/>/),
eol;
// Then capture the <Graphic>...</Graphic> or <Graphic .../> block and mark it as "don't convert".
if (noContent) {
eol = data.indexOf(gfxEnd, gfx) + gfxEnd.length;
} else {
eol = data.indexOf(gfxEnd2, gfx) + gfxEnd2.length;
}
chunks.push({ convert: false, type: "gfx", s:gfx, e:eol, data: data.substring(gfx, eol) });
p = eol;
}
}
/**
* ...
*/
function chunkDivEnds(data, chunks, chunkMore) {
var next = chunkMore ? chunkMore[0] : false,
otherChunkers = chunkMore ? chunkMore.slice(1) : false;
var splt = data.split('</div>');
var dlen = splt.length;
splt.forEach( function(segment, pos) {
performChunking(segment, chunks, next, otherChunkers);
if (pos < dlen-1) {
chunks.push({ convert: false, type: '</div>', s:-1, e:-1, data: '</div>' });
}
});
}
/**
* ...
*/
function chunkTable(data, chunks, chunkMore) {
var p = 0,
next = chunkMore ? chunkMore[0] : false,
otherChunkers = chunkMore ? chunkMore.slice(1) : false,
tableMatch = '\n<table',
tableClosingTag = '</table>\n';
while (p !== -1) {
// Let's check for a <table> tag
let table = data.indexOf(tableMatch, p);
if (table === -1) {
// No tables found: we're done here. Parse the remaining
// data for whatever else might be in there.
performChunking(data.substring(p), chunks, next, otherChunkers);
break;
}
// First parse the non-table data for whatever else might be in there.
performChunking(data.substring(p, table), chunks, next, otherChunkers);
// then mark the table code as no-convert
let eod = data.indexOf(tableClosingTag, table) + tableClosingTag.length;
chunks.push({ convert: false, type: "table", s:table, e:eod, data: data.substring(table, eod) });
p = eod;
}
}
/**
* ...
*/
function chunkDivs(data, chunks, chunkMore) {
var p = 0,
next = chunkMore ? chunkMore[0] : false,
otherChunkers = chunkMore ? chunkMore.slice(1) : false,
divMatch = '\n<div className="',
divEnd = '">\n',
divClosingTag = '</div>\n';
while (p !== -1) {
// Let's check for a <div className="..."> tag
let div = data.indexOf(divMatch, p);
if (div === -1) {
// No div tags found: we're done here. Parse the remaining
// data for whatever else might be in there.
performChunking(data.substring(p), chunks, next, otherChunkers);
break;
}
// First parse the non-div data for whatever else might be in there.
performChunking(data.substring(p, div), chunks, next, otherChunkers);
// Now, if we have a div, there's a few options:
//
// - "figure" contains HTML content, not to be converted
// - "note" contains markdown content, to be converted
// - "howtocode" contains markdown content, to be converted
let className = data.substring(div).match(/className="([^"]+)"/);
if (className !== null) { className = className[1]; }
let eod, type="div";
if (className === "figure" || className === "two-column") {
eod = data.indexOf(divClosingTag, div) + divClosingTag.length;
type += "." + className;
} else {
eod = data.indexOf(divEnd, div) + divEnd.length;
}
chunks.push({ convert: false, type: type, s:div, e:eod, data: data.substring(div, eod) });
p = eod;
}
}
/**
* Split data up into "latex" and "not latex".
* Anything that is not latex might still be "not markdown"
* though, so we hand that data off to additional chunkers
*/
function chunkLatex(data, chunks, chunkMore) {
var p = 0,
next = chunkMore ? chunkMore[0] : false,
otherChunkers = chunkMore ? chunkMore.slice(1) : false,
latexEnd = '\\]';
while (p !== -1) {
// Let's check a LaTeX block
let latex = data.indexOf('\\[', p);
if (latex === -1) {
// No LaTeX block found: we're done here. Parse the remaining
// data for whatever else might be in there.
performChunking(data.substring(p), chunks, next, otherChunkers);
break;
}
// First parse the non-LaTeX data for whatever else might be in there.
performChunking(data.substring(p, latex), chunks, next, otherChunkers);
// Then capture the LaTeX block and mark it as "don't convert"
let eol = data.indexOf(latexEnd, latex) + latexEnd.length;
chunks.push({ convert: false, type: "latex", s:latex, e:eol, data: data.substring(latex, eol) });
p = eol;
}
}
// in-place chunking
function performChunking(data, chunks, chunker, moreChunkers) {
// If there's no further chunking function to run, just
// record this data as a chunk of convertible data.
if (!chunker) {
if (data.trim()!=='') {
chunks.push({ convert: true, data: data });
}
return "early";
}
// otherwise, perform more chunking.
chunker(data, chunks, moreChunkers);
}
/**
* Split data up into "markdown" and "not markdown" parts.
* We'll only run markdown conversion on the markdown parts.
*/
module.exports = function chunk(data) {
var chunks = [];
var chunkers = [
chunkDivs,
chunkDivEnds,
chunkTable,
chunkGraphicJSX,
chunkBSplineGraphicsJSX,
chunkScriptTags,
chunkStyleTags,
fixMarkDownLinks
];
performChunking(data, chunks, chunkLatex,chunkers);
return chunks;
};