<?php if (!defined('PmWiki')) exit();

/*	=== ConvertHTML ===
 *	Copyright 2008-2010 Eemeli Aro <eemeli@gmail.com>
 *
 *	A set of replace-on-edit rules for automatically converting
 *	HTML to PmWiki markup
 *
 *	Developed and tested using the PmWiki 2.2 series.
 *
 *	To install, add the following line to your configuration file :
		include_once("$FarmD/cookbook/convert-html.php");
 *
 *	For more information, please see the online documentation at
 *		http://www.pmwiki.org/wiki/Cookbook/ConvertHTML
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License,
 *	Version 2, as published by the Free Software Foundation.
 *	http://www.gnu.org/copyleft/gpl.html
 */

$RecipeInfo['ConvertHTML']['Version'] = '2011-02-16';

SDVA($ROEPatterns, array(
	'#\[([=@]).*?\1\]#es' => "Keep(stripslashes('\$0'), 'H')",
	'#[ \t]*</?(?:html|head|body)>(\n?)\n*#i' => '$1',
	'#<title>\s*(.*?)\s*</title>\n*#is' => "(: title $1:)\n",
	'#<meta name=([\'"])(keywords|description)\1 content=([\'"])(.*?)\3 */>\n*#i' => "(:$2 $4:)\n",
	'#<!--(.*?)-->#' => "%comment% $1 %%",
	'#<(ul|ol|dl)\s+([^>]+)>\s*(<(li|dt)\b[^>]*>)#i' => '<$1>$3%apply=list $2% ',
	'#<(li|dt)\s+([^>]+)>#i' => "<$1>%apply=item $2% ",
	'#\s*(<(ul|ol)\s*>\s*<li.*</\2>)\n?#ise' => 'ConvertHtmlList(stripslashes("$1"))',
	'#\s*<dt\s*>(.*?)(?:</dt>)?\s*<dd\s*>\s*(.*?)(?:</dd>)?\n#is' => "\n:$1:$2\n",
	'#\s*</?dl\s*>#i' => '',
	'#\s*<table( [^>]*)?>(.*?)\s*</table>#is' => "\n(:table$1:)$2\n(:tableend:)",
	'#\s*<td( [^>]*)?>(.*?)</td>#is' => "\n(:cell$1:)$2",
	'#\s*<tr( [^>]*)?>\s*\(:cell\b(.*?)</tr>#is' => "\n(:cellnr$1$2",
	'#<(p|h\d)\s+([^>]+)>#i' => "<$1>%block $2% ",
	'#\s*<h(\d)\s*>(.*?)</h\1>\n*#ise' => "\"\n\n\".str_repeat('!','$1').' '.stripslashes('$2').\"\n\"",
	'#\s*<p\s*>\s*(.*?)</p>\n?#is' => "\n\n$1\n",
	'#\s*<p\s*>\s*#is' => "\n\n",
	'#\s*<div( [^>]*)?>\s*(.*?)\n?</div>\n*#is' => "\n(:div$1:)\n$2\n(:divend:)\n",
	'#<span\b\s*([^>]*)>(.*?)</span>#ise' => '"%".ConvertHtmlSpan(stripslashes("$1"))."% $2 %%"',
	'#\s*<blockquote>\s*(.*?)</blockquote>\n*#is' => "\n->$1\n",
	'#<br\s+clear=[\'"]?(all|left|right)[\'"]?\s*/?>\n*#i' => "[[<<]]\n",
	'#<br */?>\n*#i' => "\\\\\\\n",
	'#\s*<hr */?>\n*#i' => "\n----\n",
	'#</?(i|em)>#i' => "''",
	'#</?(b|strong)>#i' => "'''",
	'#</?(code|tt)>#i' => "@@",
	'#<pre>(.*?)</pre>#is' => "[@$1@]",
	'#<big>(.*?)</big>#is' => "'+$1+'",
	'#<small>(.*?)</small>#is' => "'-$1-'",
	'#<sup>(.*?)</sup>#is' => "'^$1^'",
	'#<sub>(.*?)</sub>#is' => "'_$1_'",
	'#<(ins|u)>(.*?)</\1>#is' => "{+$2+}",
	'#<del>(.*?)</del>#is' => "{-$1-}",
	'#(<(?:a|img)\b[^>]+\b(href|src)=)([\'"])([./][^\'"]*?)\3#i' => "$1$3Path:$4$3",
	'#(<(?:a|img)\b[^>]+\b(href|src)=)([\'"])([^/:\'"]+\.[^/:\'"]+?)\3#i' => "$1$3Attach:$4$3",
	'#<a\s[^>]*\bname=([\'"])([^\'"]*?)\1[^>]*>(.*?)</a>#ise' => '"[[#".preg_replace("/\s+/","_",stripslashes("$2")).\']] $3\'',
	'#<a\s+([^>]*)>(.*?)</a>#ise' => 'ConvertHtmlLink(stripslashes("$1"), stripslashes("$2"))',
	'#<img\s([^>]*)\bsrc=([\'"])([^\'"]*?)\2\s([^>]*?)\s*(?:/?|></img)>\n?#i' => "%apply=img $1$4%$3%%\n",
	'#(.*%apply=img\b[^%]+)\balign=([\'"]?)(l|r)(?:eft|ight)\2([^%]*%[^%]+)%%#i' => '%$3float% $1$4%%',
	'#(%apply=img\b[^%]+)\b(?:alt|title)=([\'"])([^\'"]+?)\2([^%]*%[^%]+)%%#i' => '$1$4"$3"%%',
	'#%apply=img\b(?:\s+(?:alt|title)=([\'"])\s*\1)*\s*%([^%]+)%%#' => '$2',
	'#<form\s*([^>]*)>#i' => '(:input form $1:)',
	'#</form>#i' => '(:input end:)',
	'#<input\b([^>]*)\stype=([\'"]?)(\w+)\2([^>]*?)/?>#i' => '(:input $3$1$4:)',
	'#<textarea\b([^>]*)>\s*(.*?)\s*</textarea>#i' => '(:input textarea$1 value=\'$2\':)',
	'#<select\b([^>]*)>(.*?)</select>#ise' => 'preg_replace("!<option\b([^>]*)>\s*(.*?)\s*</option>!is","(:input select$1\$1 label=\"\$2\":)","$2")',
	"#$KeepToken(\d.*?H)$KeepToken#e" => "\$GLOBALS['KPV']['$1']"
));

function ConvertHtmlLink($param, $txt) {
	$opt = array_change_key_case(ParseArgs($param), CASE_LOWER);
	if (empty($opt['href'])) return "<a $param>$txt</a>";
	$link = "[[{$opt['href']}|$txt]]";
	$ws = '';
	foreach( array('target','rel','accesskey') as $p ) if (!empty($opt[$p])) $ws .= "$p=\"{$opt[$p]}\" ";
	if (empty($ws)) return $link;
	$ws = str_replace('target="_blank"', 'newwin', rtrim($ws, ' '));
	return "%$ws%$link";
}

function ConvertHtmlSpan($param) {
	return preg_replace(
		array( '/%/', '/(?:class|style)=([\'"])(.*?)\1/' ),
		array( 'pct', '$2' ),
		$param );
}

function ConvertHtmlList($html) {
	$out = '';
	$lit = array();
	$strip = FALSE;
	$html = preg_replace('#(</?(?:ol|ul|li))\b([^>]+)>#i','$1>',$html);
	$lia = preg_split( '#\s*(</?(?:ol|ul|li)\s*>)\s*#i', $html, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
	foreach( $lia as $n ) switch($n) {
		case '<ul>': case '<UL>':
			$lit[] = "\n".str_repeat( '*', count($lit)+1 ).' ';
			break;
		case '<ol>': case '<OL>':
			$lit[] = "\n".str_repeat( '#', count($lit)+1 ).' ';
			break;
		case '</ul>': case '</UL>':
		case '</ol>': case '</OL>':
			array_pop($lit);
			$strip = FALSE;
			break;
		case '<li>': case '<LI>':
			if($lit) $out .= end($lit);
			$strip = TRUE;
			break;
		case '</li>': case '</LI>':
			$strip = FALSE;
			break;
		default:
			if ($strip) {
				$out .= preg_replace('/\s+/',' ',$n);
				$strip = FALSE;
			} else $out .= $n;
	}
	return $out;
}