You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
569 lines
14 KiB
569 lines
14 KiB
<?php
|
|
|
|
/*
|
|
|
|
Copyright (c) 2009-2019 F3::Factory/Bong Cosca, All rights reserved.
|
|
|
|
This file is part of the Fat-Free Framework (http://fatfreeframework.com).
|
|
|
|
This is free software: you can redistribute it and/or modify it under the
|
|
terms of the GNU General Public License as published by the Free Software
|
|
Foundation, either version 3 of the License, or later.
|
|
|
|
Fat-Free Framework is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along
|
|
with Fat-Free Framework. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
//! Markdown-to-HTML converter
|
|
class Markdown extends Prefab {
|
|
|
|
protected
|
|
//! Parsing rules
|
|
$blocks,
|
|
//! Special characters
|
|
$special;
|
|
|
|
/**
|
|
* Process blockquote
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
protected function _blockquote($str) {
|
|
$str=preg_replace('/(?<=^|\n)\h?>\h?(.*?(?:\n+|$))/','\1',$str);
|
|
return strlen($str)?
|
|
('<blockquote>'.$this->build($str).'</blockquote>'."\n\n"):'';
|
|
}
|
|
|
|
/**
|
|
* Process whitespace-prefixed code block
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
protected function _pre($str) {
|
|
$str=preg_replace('/(?<=^|\n)(?: {4}|\t)(.+?(?:\n+|$))/','\1',
|
|
$this->esc($str));
|
|
return strlen($str)?
|
|
('<pre><code>'.
|
|
$this->esc($this->snip($str)).
|
|
'</code></pre>'."\n\n"):
|
|
'';
|
|
}
|
|
|
|
/**
|
|
* Process fenced code block
|
|
* @return string
|
|
* @param $hint string
|
|
* @param $str string
|
|
**/
|
|
protected function _fence($hint,$str) {
|
|
$str=$this->snip($str);
|
|
$fw=Base::instance();
|
|
if ($fw->HIGHLIGHT) {
|
|
switch (strtolower($hint)) {
|
|
case 'php':
|
|
$str=$fw->highlight($str);
|
|
break;
|
|
case 'apache':
|
|
preg_match_all('/(?<=^|\n)(\h*)'.
|
|
'(?:(<\/?)(\w+)((?:\h+[^>]+)*)(>)|'.
|
|
'(?:(\w+)(\h.+?)))(\h*(?:\n+|$))/',
|
|
$str,$matches,PREG_SET_ORDER);
|
|
$out='';
|
|
foreach ($matches as $match)
|
|
$out.=$match[1].
|
|
($match[3]?
|
|
('<span class="section">'.
|
|
$this->esc($match[2]).$match[3].
|
|
'</span>'.
|
|
($match[4]?
|
|
('<span class="data">'.
|
|
$this->esc($match[4]).
|
|
'</span>'):
|
|
'').
|
|
'<span class="section">'.
|
|
$this->esc($match[5]).
|
|
'</span>'):
|
|
('<span class="directive">'.
|
|
$match[6].
|
|
'</span>'.
|
|
'<span class="data">'.
|
|
$this->esc($match[7]).
|
|
'</span>')).
|
|
$match[8];
|
|
$str='<code>'.$out.'</code>';
|
|
break;
|
|
case 'html':
|
|
preg_match_all(
|
|
'/(?:(?:<(\/?)(\w+)'.
|
|
'((?:\h+(?:\w+\h*=\h*)?".+?"|[^>]+)*|'.
|
|
'\h+.+?)(\h*\/?)>)|(.+?))/s',
|
|
$str,$matches,PREG_SET_ORDER
|
|
);
|
|
$out='';
|
|
foreach ($matches as $match) {
|
|
if ($match[2]) {
|
|
$out.='<span class="xml_tag"><'.
|
|
$match[1].$match[2].'</span>';
|
|
if ($match[3]) {
|
|
preg_match_all(
|
|
'/(?:\h+(?:(?:(\w+)\h*=\h*)?'.
|
|
'(".+?")|(.+)))/',
|
|
$match[3],$parts,PREG_SET_ORDER
|
|
);
|
|
foreach ($parts as $part)
|
|
$out.=' '.
|
|
(empty($part[3])?
|
|
((empty($part[1])?
|
|
'':
|
|
('<span class="xml_attr">'.
|
|
$part[1].'</span>=')).
|
|
'<span class="xml_data">'.
|
|
$part[2].'</span>'):
|
|
('<span class="xml_tag">'.
|
|
$part[3].'</span>'));
|
|
}
|
|
$out.='<span class="xml_tag">'.
|
|
$match[4].'></span>';
|
|
}
|
|
else
|
|
$out.=$this->esc($match[5]);
|
|
}
|
|
$str='<code>'.$out.'</code>';
|
|
break;
|
|
case 'ini':
|
|
preg_match_all(
|
|
'/(?<=^|\n)(?:'.
|
|
'(;[^\n]*)|(?:<\?php.+?\?>?)|'.
|
|
'(?:\[(.+?)\])|'.
|
|
'(.+?)(\h*=\h*)'.
|
|
'((?:\\\\\h*\r?\n|.+?)*)'.
|
|
')((?:\r?\n)+|$)/',
|
|
$str,$matches,PREG_SET_ORDER
|
|
);
|
|
$out='';
|
|
foreach ($matches as $match) {
|
|
if ($match[1])
|
|
$out.='<span class="comment">'.$match[1].
|
|
'</span>';
|
|
elseif ($match[2])
|
|
$out.='<span class="ini_section">['.$match[2].']'.
|
|
'</span>';
|
|
elseif ($match[3])
|
|
$out.='<span class="ini_key">'.$match[3].
|
|
'</span>'.$match[4].
|
|
($match[5]?
|
|
('<span class="ini_value">'.
|
|
$match[5].'</span>'):'');
|
|
else
|
|
$out.=$match[0];
|
|
if (isset($match[6]))
|
|
$out.=$match[6];
|
|
}
|
|
$str='<code>'.$out.'</code>';
|
|
break;
|
|
default:
|
|
$str='<code>'.$this->esc($str).'</code>';
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
$str='<code>'.$this->esc($str).'</code>';
|
|
return '<pre>'.$str.'</pre>'."\n\n";
|
|
}
|
|
|
|
/**
|
|
* Process horizontal rule
|
|
* @return string
|
|
**/
|
|
protected function _hr() {
|
|
return '<hr />'."\n\n";
|
|
}
|
|
|
|
/**
|
|
* Process atx-style heading
|
|
* @return string
|
|
* @param $type string
|
|
* @param $str string
|
|
**/
|
|
protected function _atx($type,$str) {
|
|
$level=strlen($type);
|
|
return '<h'.$level.' id="'.Web::instance()->slug($str).'">'.
|
|
$this->scan($str).'</h'.$level.'>'."\n\n";
|
|
}
|
|
|
|
/**
|
|
* Process setext-style heading
|
|
* @return string
|
|
* @param $str string
|
|
* @param $type string
|
|
**/
|
|
protected function _setext($str,$type) {
|
|
$level=strpos('=-',$type)+1;
|
|
return '<h'.$level.' id="'.Web::instance()->slug($str).'">'.
|
|
$this->scan($str).'</h'.$level.'>'."\n\n";
|
|
}
|
|
|
|
/**
|
|
* Process ordered/unordered list
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
protected function _li($str) {
|
|
// Initialize list parser
|
|
$len=strlen($str);
|
|
$ptr=0;
|
|
$dst='';
|
|
$first=TRUE;
|
|
$tight=TRUE;
|
|
$type='ul';
|
|
// Main loop
|
|
while ($ptr<$len) {
|
|
if (preg_match('/^\h*[*-](?:\h?[*-]){2,}(?:\n+|$)/',
|
|
substr($str,$ptr),$match)) {
|
|
$ptr+=strlen($match[0]);
|
|
// Embedded horizontal rule
|
|
return (strlen($dst)?
|
|
('<'.$type.'>'."\n".$dst.'</'.$type.'>'."\n\n"):'').
|
|
'<hr />'."\n\n".$this->build(substr($str,$ptr));
|
|
}
|
|
elseif (preg_match('/(?<=^|\n)([*+-]|\d+\.)\h'.
|
|
'(.+?(?:\n+|$))((?:(?: {4}|\t)+.+?(?:\n+|$))*)/s',
|
|
substr($str,$ptr),$match)) {
|
|
$match[3]=preg_replace('/(?<=^|\n)(?: {4}|\t)/','',$match[3]);
|
|
$found=FALSE;
|
|
foreach (array_slice($this->blocks,0,-1) as $regex)
|
|
if (preg_match($regex,$match[3])) {
|
|
$found=TRUE;
|
|
break;
|
|
}
|
|
// List
|
|
if ($first) {
|
|
// First pass
|
|
if (is_numeric($match[1]))
|
|
$type='ol';
|
|
if (preg_match('/\n{2,}$/',$match[2].
|
|
($found?'':$match[3])))
|
|
// Loose structure; Use paragraphs
|
|
$tight=FALSE;
|
|
$first=FALSE;
|
|
}
|
|
// Strip leading whitespaces
|
|
$ptr+=strlen($match[0]);
|
|
$tmp=$this->snip($match[2].$match[3]);
|
|
if ($tight) {
|
|
if ($found)
|
|
$tmp=$match[2].$this->build($this->snip($match[3]));
|
|
}
|
|
else
|
|
$tmp=$this->build($tmp);
|
|
$dst.='<li>'.$this->scan(trim($tmp)).'</li>'."\n";
|
|
}
|
|
}
|
|
return strlen($dst)?
|
|
('<'.$type.'>'."\n".$dst.'</'.$type.'>'."\n\n"):'';
|
|
}
|
|
|
|
/**
|
|
* Ignore raw HTML
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
protected function _raw($str) {
|
|
return $str;
|
|
}
|
|
|
|
/**
|
|
* Process paragraph
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
protected function _p($str) {
|
|
$str=trim($str);
|
|
if (strlen($str)) {
|
|
if (preg_match('/^(.+?\n)([>#].+)$/s',$str,$parts))
|
|
return $this->_p($parts[1]).$this->build($parts[2]);
|
|
$str=preg_replace_callback(
|
|
'/([^<>\[]+)?(<[\?%].+?[\?%]>|<.+?>|\[.+?\]\s*\(.+?\))|'.
|
|
'(.+)/s',
|
|
function($expr) {
|
|
$tmp='';
|
|
if (isset($expr[4]))
|
|
$tmp.=$this->esc($expr[4]);
|
|
else {
|
|
if (isset($expr[1]))
|
|
$tmp.=$this->esc($expr[1]);
|
|
$tmp.=$expr[2];
|
|
if (isset($expr[3]))
|
|
$tmp.=$this->esc($expr[3]);
|
|
}
|
|
return $tmp;
|
|
},
|
|
$str
|
|
);
|
|
$str=preg_replace('/\s{2}\r?\n/','<br />',$str);
|
|
return '<p>'.$this->scan($str).'</p>'."\n\n";
|
|
}
|
|
return '';
|
|
}
|
|
|
|
/**
|
|
* Process strong/em/strikethrough spans
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
protected function _text($str) {
|
|
$tmp='';
|
|
while ($str!=$tmp)
|
|
$str=preg_replace_callback(
|
|
'/(?<=\s|^)(?<!\\\\)([*_])([*_]?)([*_]?)(.*?)(?!\\\\)\3\2\1(?=[\s[:punct:]]|$)/',
|
|
function($expr) {
|
|
if ($expr[3])
|
|
return '<strong><em>'.$expr[4].'</em></strong>';
|
|
if ($expr[2])
|
|
return '<strong>'.$expr[4].'</strong>';
|
|
return '<em>'.$expr[4].'</em>';
|
|
},
|
|
preg_replace(
|
|
'/(?<!\\\\)~~(.*?)(?!\\\\)~~(?=[\s[:punct:]]|$)/',
|
|
'<del>\1</del>',
|
|
$tmp=$str
|
|
)
|
|
);
|
|
return $str;
|
|
}
|
|
|
|
/**
|
|
* Process image span
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
protected function _img($str) {
|
|
return preg_replace_callback(
|
|
'/!(?:\[(.+?)\])?\h*\(<?(.*?)>?(?:\h*"(.*?)"\h*)?\)/',
|
|
function($expr) {
|
|
return '<img src="'.$expr[2].'"'.
|
|
(empty($expr[1])?
|
|
'':
|
|
(' alt="'.$this->esc($expr[1]).'"')).
|
|
(empty($expr[3])?
|
|
'':
|
|
(' title="'.$this->esc($expr[3]).'"')).' />';
|
|
},
|
|
$str
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Process anchor span
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
protected function _a($str) {
|
|
return preg_replace_callback(
|
|
'/(?<!\\\\)\[(.+?)(?!\\\\)\]\h*\(<?(.*?)>?(?:\h*"(.*?)"\h*)?\)/',
|
|
function($expr) {
|
|
return '<a href="'.$this->esc($expr[2]).'"'.
|
|
(empty($expr[3])?
|
|
'':
|
|
(' title="'.$this->esc($expr[3]).'"')).
|
|
'>'.$this->scan($expr[1]).'</a>';
|
|
},
|
|
$str
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Auto-convert links
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
protected function _auto($str) {
|
|
return preg_replace_callback(
|
|
'/`.*?<(.+?)>.*?`|<(.+?)>/',
|
|
function($expr) {
|
|
if (empty($expr[1]) && parse_url($expr[2],PHP_URL_SCHEME)) {
|
|
$expr[2]=$this->esc($expr[2]);
|
|
return '<a href="'.$expr[2].'">'.$expr[2].'</a>';
|
|
}
|
|
return $expr[0];
|
|
},
|
|
$str
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Process code span
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
protected function _code($str) {
|
|
return preg_replace_callback(
|
|
'/`` (.+?) ``|(?<!\\\\)`(.+?)(?!\\\\)`/',
|
|
function($expr) {
|
|
return '<code>'.
|
|
$this->esc(empty($expr[1])?$expr[2]:$expr[1]).'</code>';
|
|
},
|
|
$str
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Convert characters to HTML entities
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
function esc($str) {
|
|
if (!$this->special)
|
|
$this->special=[
|
|
'...'=>'…',
|
|
'(tm)'=>'™',
|
|
'(r)'=>'®',
|
|
'(c)'=>'©'
|
|
];
|
|
foreach ($this->special as $key=>$val)
|
|
$str=preg_replace('/'.preg_quote($key,'/').'/i',$val,$str);
|
|
return htmlspecialchars($str,ENT_COMPAT,
|
|
Base::instance()->ENCODING,FALSE);
|
|
}
|
|
|
|
/**
|
|
* Reduce multiple line feeds
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
protected function snip($str) {
|
|
return preg_replace('/(?:(?<=\n)\n+)|\n+$/',"\n",$str);
|
|
}
|
|
|
|
/**
|
|
* Scan line for convertible spans
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
function scan($str) {
|
|
$inline=['img','a','text','auto','code'];
|
|
foreach ($inline as $func)
|
|
$str=$this->{'_'.$func}($str);
|
|
return $str;
|
|
}
|
|
|
|
/**
|
|
* Assemble blocks
|
|
* @return string
|
|
* @param $str string
|
|
**/
|
|
protected function build($str) {
|
|
if (!$this->blocks) {
|
|
// Regexes for capturing entire blocks
|
|
$this->blocks=[
|
|
'blockquote'=>'/^(?:\h?>\h?.*?(?:\n+|$))+/',
|
|
'pre'=>'/^(?:(?: {4}|\t).+?(?:\n+|$))+/',
|
|
'fence'=>'/^`{3}\h*(\w+)?.*?[^\n]*\n+(.+?)`{3}[^\n]*'.
|
|
'(?:\n+|$)/s',
|
|
'hr'=>'/^\h*[*_-](?:\h?[\*_-]){2,}\h*(?:\n+|$)/',
|
|
'atx'=>'/^\h*(#{1,6})\h?(.+?)\h*(?:#.*)?(?:\n+|$)/',
|
|
'setext'=>'/^\h*(.+?)\h*\n([=-])+\h*(?:\n+|$)/',
|
|
'li'=>'/^(?:(?:[*+-]|\d+\.)\h.+?(?:\n+|$)'.
|
|
'(?:(?: {4}|\t)+.+?(?:\n+|$))*)+/s',
|
|
'raw'=>'/^((?:<!--.+?-->|'.
|
|
'<(address|article|aside|audio|blockquote|canvas|dd|'.
|
|
'div|dl|fieldset|figcaption|figure|footer|form|h\d|'.
|
|
'header|hgroup|hr|noscript|object|ol|output|p|pre|'.
|
|
'section|table|tfoot|ul|video).*?'.
|
|
'(?:\/>|>(?:(?>[^><]+)|(?R))*<\/\2>))'.
|
|
'\h*(?:\n{2,}|\n*$)|<[\?%].+?[\?%]>\h*(?:\n?$|\n*))/s',
|
|
'p'=>'/^(.+?(?:\n{2,}|\n*$))/s'
|
|
];
|
|
}
|
|
// Treat lines with nothing but whitespaces as empty lines
|
|
$str=preg_replace('/\n\h+(?=\n)/',"\n",$str);
|
|
// Initialize block parser
|
|
$len=strlen($str);
|
|
$ptr=0;
|
|
$dst='';
|
|
// Main loop
|
|
while ($ptr<$len) {
|
|
if (preg_match('/^ {0,3}\[([^\[\]]+)\]:\s*<?(.*?)>?\s*'.
|
|
'(?:"([^\n]*)")?(?:\n+|$)/s',substr($str,$ptr),$match)) {
|
|
// Reference-style link; Backtrack
|
|
$ptr+=strlen($match[0]);
|
|
$tmp='';
|
|
// Catch line breaks in title attribute
|
|
$ref=preg_replace('/\h/','\s',preg_quote($match[1],'/'));
|
|
while ($dst!=$tmp) {
|
|
$dst=preg_replace_callback(
|
|
'/(?<!\\\\)\[('.$ref.')(?!\\\\)\]\s*\[\]|'.
|
|
'(!?)(?:\[([^\[\]]+)\]\s*)?'.
|
|
'(?<!\\\\)\[('.$ref.')(?!\\\\)\]/',
|
|
function($expr) use($match) {
|
|
return (empty($expr[2]))?
|
|
// Anchor
|
|
('<a href="'.$this->esc($match[2]).'"'.
|
|
(empty($match[3])?
|
|
'':
|
|
(' title="'.
|
|
$this->esc($match[3]).'"')).'>'.
|
|
// Link
|
|
$this->scan(
|
|
empty($expr[3])?
|
|
(empty($expr[1])?
|
|
$expr[4]:
|
|
$expr[1]):
|
|
$expr[3]
|
|
).'</a>'):
|
|
// Image
|
|
('<img src="'.$match[2].'"'.
|
|
(empty($expr[2])?
|
|
'':
|
|
(' alt="'.
|
|
$this->esc($expr[3]).'"')).
|
|
(empty($match[3])?
|
|
'':
|
|
(' title="'.
|
|
$this->esc($match[3]).'"')).
|
|
' />');
|
|
},
|
|
$tmp=$dst
|
|
);
|
|
}
|
|
}
|
|
else
|
|
foreach ($this->blocks as $func=>$regex)
|
|
if (preg_match($regex,substr($str,$ptr),$match)) {
|
|
$ptr+=strlen($match[0]);
|
|
$dst.=call_user_func_array(
|
|
[$this,'_'.$func],
|
|
count($match)>1?array_slice($match,1):$match
|
|
);
|
|
break;
|
|
}
|
|
}
|
|
return $dst;
|
|
}
|
|
|
|
/**
|
|
* Render HTML equivalent of markdown
|
|
* @return string
|
|
* @param $txt string
|
|
**/
|
|
function convert($txt) {
|
|
$txt=preg_replace_callback(
|
|
'/(<code.*?>.+?<\/code>|'.
|
|
'<[^>\n]+>|\([^\n\)]+\)|"[^"\n]+")|'.
|
|
'\\\\(.)/s',
|
|
function($expr) {
|
|
// Process escaped characters
|
|
return empty($expr[1])?$expr[2]:$expr[1];
|
|
},
|
|
$this->build(preg_replace('/\r\n|\r/',"\n",$txt))
|
|
);
|
|
return $this->snip($txt);
|
|
}
|
|
|
|
}
|
|
|