.
/**
* Utility function to convert wiki-like to Markdown format
*
* @package moodlecore
* @copyright Howard Miller, 2005
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
/**#@+
* state defines
*/
define( "STATE_NONE",1 ); // blank line has been detected, so looking for first line on next para
define( "STATE_PARAGRAPH",2 ); // currently processing vanilla paragraph
define( "STATE_BLOCKQUOTE",3 ); // currently processing blockquote section
define( "STATE_PREFORM",4 ); // currently processing preformatted text
define( "STATE_NOTIKI",5 ); // currently processing preformatted / no formatting
/**#@-*/
/**#@+
* list defines
*/
define( "LIST_NONE", 1 ); // no lists active
define( "LIST_UNORDERED", 2 ); // unordered list active
define( "LIST_ORDERED", 3 ); // ordered list active
define( "LIST_DEFINITION", 4 ); // definition list active
/**#@-*/
/**
* @package moodlecore
* @copyright Howard Miller, 2005
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
class WikiToMarkdown {
var $block_state;
var $list_state;
var $list_depth;
var $list_backtrack;
var $output; // output buffer
var $courseid;
function close_block( $state ) {
// provide appropriate closure for block according to state
// if in list close this first
$lclose = "";
if ($this->list_state != LIST_NONE) {
$lclose = $this->do_list( " ",true );
}
$sclose = "";
switch ($state) {
case STATE_PARAGRAPH:
$sclose = "\n";
break;
case STATE_BLOCKQUOTE:
$sclose = "\n";
break;
case STATE_PREFORM:
$sclose = "\n";
break;
case STATE_NOTIKI:
$sclose = "\n";
break;
}
return $lclose . $sclose;
}
function do_replace( $line, $mark, $tag ) {
// do the regex thingy for things like bold, italic etc
// $mark is the magic character, and $tag the HTML tag to insert
// BODGE: replace inline $mark characters in places where we want them ignored
// they will be put back after main substitutue, stops problems with eg, and/or
$bodge = chr(1);
$line = eregi_replace( '([[:alnum:]])'.$mark.'([[:alnum:]])', '\\1'.$bodge.'\\2',$line );
$regex = '(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)';
$replace = '\\1<'.$tag.'>\\2'.$tag.'>\\3';
$line = eregi_replace( $regex, $replace, $line );
// BODGE: back we go
$line = eregi_replace( $bodge, $mark, $line );
return $line;
}
function do_replace_markdown( $line, $mark, $tag ) {
// do the regex thingy for things like bold, italic etc
// $mark is the magic character, and $tag the HTML tag to insert
// MARKDOWN version does not generate HTML tags, just straigt replace
// BODGE: replace inline $mark characters in places where we want them ignored
// they will be put back after main substitutue, stops problems with eg, and/or
$bodge = chr(1);
$line = eregi_replace( '([[:alnum:]])'.$mark.'([[:alnum:]])', '\\1'.$bodge.'\\2',$line );
$regex = '(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)';
$replace = '\\1'.$tag.'\\2'.$tag.'\\3';
$line = eregi_replace( $regex, $replace, $line );
// BODGE: back we go
$line = eregi_replace( $bodge, $mark, $line );
return $line;
}
function do_replace_sub( $line, $mark, $tag ) {
// do regex for subscript and superscript (slightly different)
// $mark is the magic character and $tag the HTML tag to insert
$regex = $mark.'([^'.$mark.']*)'.$mark;
$replace = '<'.$tag.'>\\1'.$tag.'>';
return eregi_replace( $regex, $replace, $line );
}
function do_list( $line, $blank=false ) {
// handle line with list character on it
// if blank line implies drop to level 0
// get magic character and then delete it from the line if not blank
if ($blank) {
$listchar="";
$count = 0;
}
else {
$listchar = $line{0};
$count = strspn( $line, $listchar );
$line = eregi_replace( "^[".$listchar."]+ ", "", $line );
}
// find what sort of list this character represents
$list_tag = "";
$list_close_tag = "";
$item_tag = "";
$item_close_tag = "";
$list_style = LIST_NONE;
switch ($listchar) {
case '*':
$list_tag = "";
$list_close_tag = "";
$item_tag = "*";
$item_close_tag = "";
$list_style = LIST_UNORDERED;
break;
case '#':
$list_tag = "";
$list_close_tag = "";
$item_tag = "1.";
$item_close_tag = "";
$list_style = LIST_ORDERED;
break;
case ';':
$list_tag = "
";
$list_close_tag = "
";
$item_tag = "";
$item_close_tag = "";
$list_style = LIST_DEFINITION;
break;
case ':':
$list_tag = "";
$list_close_tag = "
";
$item_tag = "";
$item_close_tag = "";
$list_style = LIST_DEFINITION;
break;
}
// tag opening/closing regime now - fun bit :-)
$tags = "";
// if depth has reduced do number of closes to restore level
for ($i=$this->list_depth; $i>$count; $i-- ) {
$close_tag = array_pop( $this->list_backtrack );
$tags = $tags . $close_tag;
}
// if depth has increased do number of opens to balance
for ($i=$this->list_depth; $i<$count; $i++ ) {
array_push( $this->list_backtrack, "$list_close_tag" );
$tags = $tags . "$list_tag";
}
// ok, so list state is now same as style and depth same as count
$this->list_state = $list_style;
$this->list_depth = $count;
// get indent
$indent = substr( " ",1,$count-1 );
if ($blank) {
$newline = $tags;
}
else {
$newline = $tags . $indent . "$item_tag " . $line . "$item_close_tag";
}
return $newline;
}
function line_replace( $line ) {
// return line after various formatting replacements
// have been made - order is vital to stop them interfering with each other
global $CFG;
// ---- (at least) means a
// MARKDOWN: no change so leave
// is this a list line (starts with * # ; :)
if (eregi( "^([*]+|[#]+|[;]+|[:]+) ", $line )) {
$line = $this->do_list( $line );
}
// typographic conventions
// MARKDOWN: no equiv. so convert to entity as before
// $line = str_replace( "--", "—", $line );
// $line = str_replace( " - ", " – ", $line );
$line = str_replace( "...", " … ", $line );
$line = str_replace( "(R)", "®", $line );
$line = str_replace( "(r)", "®", $line );
$line = str_replace( "(TM)", "™", $line );
$line = str_replace( "(tm)", "™", $line );
$line = str_replace( "(C)", "©", $line );
$line = str_replace( "1/4", "¼", $line );
$line = str_replace( "1/2", "½", $line );
$line = str_replace( "3/4", "¾", $line );
$line = eregi_replace( "([[:digit:]]+[[:space:]]*)x([[:space:]]*[[:digit:]]+)", "\\1×\\2", $line ); // (digits) x (digits) - multiply
// do formatting tags
// NOTE: The / replacement *has* to be first, or it will screw the
// HTML tags that are added by the other ones
// MARKDOWN: only bold and italic change, rest are just HTML
$line = $this->do_replace_markdown( $line, "\*", "**" );
$line = $this->do_replace_markdown( $line, "/", "*" );
$line = $this->do_replace( $line, "\+", "ins" );
// $line = $this->do_replace( $line, "-", "del" );
$line = $this->do_replace_sub( $line, "~", "sub" );
$line = $this->do_replace_sub( $line, "\^", "sup" );
$line = $this->do_replace( $line, "%", "code" );
$line = $this->do_replace( $line, "@", "cite" );
// convert urls into proper link with optional link text URL(text)
// MARDOWN: HTML conversion should work fine
$line = eregi_replace("([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)",
"\\1[\\5](\\2://\\3\\4)", $line);
$line = eregi_replace("([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)",
"\\1[\\5](http://www.\\2\\3)", $line);
// make urls (with and without httpd) into proper links
$line = eregi_replace("([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])",
"\\1<\\2://\\3\\4>", $line);
$line = eregi_replace("([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])",
"\\1", $line);
// make email addresses into mailtos....
// MARKDOWN doesn't quite support this, so do as html
$line = eregi_replace("([[:space:]]|^)([[:alnum:]._-]+@[[:alnum:]._-]+)\(([^)]+)\)",
"\\1\\3", $line);
// !# at the beginning of any lines means a heading
// MARKDOWN: value (1-6) becomes number of hashes
if (eregi( "^!([1-6]) (.*)$", $line, $regs )) {
$depth = substr( $line, 1, 1 );
$out = substr( '##########', 0, $depth);
$line = eregi_replace( "^!([1-6]) (.*)$", "$out \\2", $line );
}
// acronym handing, example HTML(Hypertext Markyp Language)
// MARKDOWN: no equiv. so just leave as HTML
$line = ereg_replace( "([A-Z]+)\(([^)]+)\)", "\\1", $line );
// Replace resource link >>##(Description Text)
// MARKDOWN: change to MD web link style
$line = eregi_replace( " ([a-zA-Z]+):([0-9]+)\(([^)]+)\)",
" [\\3](".$CFG->wwwroot."/mod/\\1/view.php?id=\\2) ", $line );
require_once($CFG->libdir.'/filelib.php');
$coursefileurl = get_file_url($this->courseid);
// Replace picture resource link
$line = eregi_replace( "/([a-zA-Z0-9./_-]+)(png|gif|jpg)\(([^)]+)\)",
"![\\3](".$coursefileurl."/\\1\\2)", $line );
// Replace file resource link
$line = eregi_replace( "file:/([[:alnum:]/._-]+)\(([^)]+)\)",
"[\\2](".$coursefileurl."/\\1)", $line );
return $line;
}
function convert( $content,$courseid ) {
// main entry point for processing Wiki-like text
// $content is string containing text with Wiki-Like formatting
// return: string containing Markdown formatting
// initialisation stuff
$this->output = "";
$this->block_state = STATE_NONE;
$this->list_state = LIST_NONE;
$this->list_depth = 0;
$this->list_backtrack = array();
$this->spelling_on = false;
$this->courseid = $courseid;
// split content into array of single lines
$lines = explode( "\n",$content );
$buffer = "";
// run through lines
foreach( $lines as $line ) {
// is this a blank line?
$blank_line = eregi( "^[[:blank:]\r]*$", $line );
if ($blank_line) {
// first end current block according to state
$buffer = $buffer . $this->close_block( $this->block_state );
$this->block_state = STATE_NONE;
continue;
}
// act now depending on current block state
if ($this->block_state == STATE_NONE) {
// first character of line defines block type
if (eregi( "^> ",$line )) {
// blockquote
$buffer = $buffer . $this->line_replace( $line ). "\n";
$this->block_state = STATE_BLOCKQUOTE;
}
else
if (eregi( "^ ",$line) ) {
// preformatted text
// MARKDOWN: no real equiv. so just use
$buffer = $buffer . "\n";
$buffer = $buffer . $this->line_replace($line) . "\n";
$this->block_state = STATE_PREFORM;
}
else
if (eregi("^\% ",$line) ) {
// preformatted text - no processing
// MARKDOWN: this is MD code form of a paragraph
$buffer = $buffer . " " . eregi_replace( "^\%","",$line) . "\n";
$this->block_state = STATE_NOTIKI;
}
else {
// ordinary paragraph
$buffer = $buffer . $this->line_replace($line) . "\n";
$this->block_state = STATE_PARAGRAPH;
}
continue;
}
if (($this->block_state == STATE_PARAGRAPH) |
($this->block_state == STATE_BLOCKQUOTE) |
($this->block_state == STATE_PREFORM) ) {
$buffer = $buffer . $this->line_replace($line) . "\n";
continue;
}
elseif ($this->block_state == STATE_NOTIKI) {
$buffer = $buffer . " " .$line . "\n";
}
}
// close off any block level tags
$buffer = $buffer . $this->close_block( $this->block_state );
//return $buffer;
return $buffer;
}
}
?>