/** * * This file defines all the chunking functions that can be used to take * a source markdown file, and then chop it up into blocks that should * either be converted as markdown, or should be left alone. * * Getting a final HTML file using this entirely not-optimized solution * simply involves calling: * * const convert = require(some markdown converter) * const chunk = require(this file) * const html = chunk(markdown).map(c => c.convert ? convert(c.data) : c.data).join('\n') * * And you're done. * */ // fix the stupid nonsense inability for markdown parsers to see link // syntax with `)` in the links themselves function fixMarkDownLinks(data, chunks, chunkMore) { var next = chunkMore ? chunkMore[0] : false, otherChunkers = chunkMore ? chunkMore.slice(1) : false, fixes = []; data.replace(/\[[^\]]+\]\(/g, function(_match, pos, _fullstring) { // this is the start of a link. Find the offset at which the next `)` // is actually the link closer. var offset = 0; var start = pos + _match.length; var complex = false; for (let d=0, i=start; i0) { fixes.forEach(fix => { let s = fix.start, e = fix.end, newdata = fix.data.replace(/\(/g, '%28').replace(/\)/g, '%29'); // I can't believe I still have to do this in 2017... data = data.substring(0,s) + newdata + data.substring(e); }); } // alright, let "the rest" deal with this data now. performChunking(data, chunks, next, otherChunkers); } /** * ... */ function chunkStyleTags(data, chunks, chunkMore) { var p = 0, next = chunkMore ? chunkMore[0] : false, otherChunkers = chunkMore ? chunkMore.slice(1) : false, styleTag = ''; while (p !== -1) { // Let's check a BSplineGraphic tag let style = data.indexOf(styleTag, p); if (style === -1) { // No block found: we're done here. Parse the remaining // data for whatever else might be in there. performChunking(data.substring(p), chunks, next, otherChunkers); break; } // First parse the non- data for whatever else might be in there. performChunking(data.substring(p, style), chunks, next, otherChunkers); let tail = data.substring(style), eol, styledata; // Then capture the ... or block and mark it as "don't convert". eol = data.indexOf(styleTagEnd, style) + styleTagEnd.length; styledata = data.substring(style, eol); styledata = styledata.replace(/([{}])/g,"{'$1'}"); chunks.push({ convert: false, type: "style", s:style, e:eol, data: styledata }); p = eol; } } /** * ... */ function chunkScriptTags(data, chunks, chunkMore) { var p = 0, next = chunkMore ? chunkMore[0] : false, otherChunkers = chunkMore ? chunkMore.slice(1) : false, scriptTag = ' block found: we're done here. Parse the remaining // data for whatever else might be in there. performChunking(data.substring(p), chunks, next, otherChunkers); break; } // First parse the non- data for whatever else might be in there. performChunking(data.substring(p, script), chunks, next, otherChunkers); let tail = data.substring(script), eol, scriptdata; // Then capture the ... or block and mark it as "don't convert". eol = data.indexOf(scriptTagEnd, script) + scriptTagEnd.length; scriptdata = data.substring(script, eol); scriptdata.replace(/\/\*[\w\s]+\*\//g,''); chunks.push({ convert: false, type: "script", s:script, e:eol, data:scriptdata }); p = eol; } } /** * ... */ function chunkBSplineGraphicsJSX(data, chunks, chunkMore) { var p = 0, next = chunkMore ? chunkMore[0] : false, otherChunkers = chunkMore ? chunkMore.slice(1) : false, bgfxTag = ' block found: we're done here. Parse the remaining // data for whatever else might be in there. performChunking(data.substring(p), chunks, next, otherChunkers); break; } // First parse the non- data for whatever else might be in there. performChunking(data.substring(p, bgfx), chunks, next, otherChunkers); let tail = data.substring(bgfx), noContent = !!tail.match(/^]+\/>/), eol; // Then capture the ... or block and mark it as "don't convert". if (noContent) { eol = data.indexOf(bgfxEnd, bgfx) + bgfxEnd.length; } else { eol = data.indexOf(bgfxEnd2, bgfx) + bgfxEnd2.length; } chunks.push({ convert: false, type: "bgfx", s:bgfx, e:eol, data: data.substring(bgfx, eol) }); p = eol; } } /** * ... */ function chunkGraphicJSX(data, chunks, chunkMore) { var p = 0, next = chunkMore ? chunkMore[0] : false, otherChunkers = chunkMore ? chunkMore.slice(1) : false, gfxTag = ' block found: we're done here. Parse the remaining // data for whatever else might be in there. performChunking(data.substring(p), chunks, next, otherChunkers); break; } // First parse the non- data for whatever else might be in there. performChunking(data.substring(p, gfx), chunks, next, otherChunkers); let tail = data.substring(gfx), noContent = !!tail.match(/^]+\/>/), eol; // Then capture the ... or block and mark it as "don't convert". if (noContent) { eol = data.indexOf(gfxEnd, gfx) + gfxEnd.length; } else { eol = data.indexOf(gfxEnd2, gfx) + gfxEnd2.length; } chunks.push({ convert: false, type: "gfx", s:gfx, e:eol, data: data.substring(gfx, eol) }); p = eol; } } /** * ... */ function chunkDivEnds(data, chunks, chunkMore) { var next = chunkMore ? chunkMore[0] : false, otherChunkers = chunkMore ? chunkMore.slice(1) : false; var splt = data.split(''); var dlen = splt.length; splt.forEach( function(segment, pos) { performChunking(segment, chunks, next, otherChunkers); if (pos < dlen-1) { chunks.push({ convert: false, type: '', s:-1, e:-1, data: '' }); } }); } /** * ... */ function chunkTable(data, chunks, chunkMore) { var p = 0, next = chunkMore ? chunkMore[0] : false, otherChunkers = chunkMore ? chunkMore.slice(1) : false, tableMatch = '\n tag let table = data.indexOf(tableMatch, p); if (table === -1) { // No tables found: we're done here. Parse the remaining // data for whatever else might be in there. performChunking(data.substring(p), chunks, next, otherChunkers); break; } // First parse the non-table data for whatever else might be in there. performChunking(data.substring(p, table), chunks, next, otherChunkers); // then mark the table code as no-convert let eod = data.indexOf(tableClosingTag, table) + tableClosingTag.length; chunks.push({ convert: false, type: "table", s:table, e:eod, data: data.substring(table, eod) }); p = eod; } } /** * ... */ function chunkDivs(data, chunks, chunkMore) { var p = 0, next = chunkMore ? chunkMore[0] : false, otherChunkers = chunkMore ? chunkMore.slice(1) : false, divMatch = '\n
\n', divClosingTag = '
\n'; while (p !== -1) { // Let's check for a
tag let div = data.indexOf(divMatch, p); if (div === -1) { // No div tags found: we're done here. Parse the remaining // data for whatever else might be in there. performChunking(data.substring(p), chunks, next, otherChunkers); break; } // First parse the non-div data for whatever else might be in there. performChunking(data.substring(p, div), chunks, next, otherChunkers); // Now, if we have a div, there's a few options: // // - "figure" contains HTML content, not to be converted // - "note" contains markdown content, to be converted // - "howtocode" contains markdown content, to be converted let className = data.substring(div).match(/className="([^"]+)"/); if (className !== null) { className = className[1]; } let eod, type="div"; if (className === "figure" || className === "two-column") { eod = data.indexOf(divClosingTag, div) + divClosingTag.length; type += "." + className; } else { eod = data.indexOf(divEnd, div) + divEnd.length; } chunks.push({ convert: false, type: type, s:div, e:eod, data: data.substring(div, eod) }); p = eod; } } /** * Split data up into "latex" and "not latex". * Anything that is not latex might still be "not markdown" * though, so we hand that data off to additional chunkers */ function chunkLatex(data, chunks, chunkMore) { var p = 0, next = chunkMore ? chunkMore[0] : false, otherChunkers = chunkMore ? chunkMore.slice(1) : false, latexEnd = '\\]'; while (p !== -1) { // Let's check a LaTeX block let latex = data.indexOf('\\[', p); if (latex === -1) { // No LaTeX block found: we're done here. Parse the remaining // data for whatever else might be in there. performChunking(data.substring(p), chunks, next, otherChunkers); break; } // First parse the non-LaTeX data for whatever else might be in there. performChunking(data.substring(p, latex), chunks, next, otherChunkers); // Then capture the LaTeX block and mark it as "don't convert" let eol = data.indexOf(latexEnd, latex) + latexEnd.length; chunks.push({ convert: false, type: "latex", s:latex, e:eol, data: data.substring(latex, eol) }); p = eol; } } // in-place chunking function performChunking(data, chunks, chunker, moreChunkers) { // If there's no further chunking function to run, just // record this data as a chunk of convertible data. if (!chunker) { if (data.trim()!=='') { chunks.push({ convert: true, data: data }); } return "early"; } // otherwise, perform more chunking. chunker(data, chunks, moreChunkers); } /** * Split data up into "markdown" and "not markdown" parts. * We'll only run markdown conversion on the markdown parts. */ module.exports = function chunk(data) { var chunks = []; var chunkers = [ chunkDivs, chunkDivEnds, chunkTable, chunkGraphicJSX, chunkBSplineGraphicsJSX, chunkScriptTags, chunkStyleTags, fixMarkDownLinks ]; performChunking(data, chunks, chunkLatex,chunkers); return chunks; };