<?php if (!defined('PmWiki')) exit(); # vim: set ts=4 sw=4 et: ## ## File: text2tbl.php ## Version: 2010-04-06 ## SVN ID: $Id$ ## Status: alpha ## Author: Peter Bowers ## Create Date: August 25, 2008 ## Copyright: 2008-2010, Peter Bowers ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License, Version 2, as ## published by the Free Software Foundation. ## http://www.gnu.org/copyleft/gpl.html ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## text2tbl.php provides a couple $ROEPatterns to convert text to simple table ## markup. $RecipeInfo['Text2Tbl']['Version'] = '2013-09-23'; define(text2tbl, true); SDV($t2tKeepOriginal, 1); SDV($t2tEnableSuppressDefaultAlign, false); # This pattern lets you define a table like this: # TABLESTART border=1 # abc def ghi # jkl asdfasfd asdf # qwer sss sssssss # TABLEEND # You don't have to make things line up -- just make sure that there are no # occurrences of multiple consecutive spaces unless you want them to become # the dividers between columns. $ROEPatterns["/TABLESTART([^\n]*)\n(.*?)\n\s*TABLEEND/se"] = "'|| '.PSS('$1').'\n'.PSS(preg_replace(array('/ {2,}/', '/^/m', '/$/m'), array(' ||', '||', ' ||'), PSS('$2')))"; # This pattern is significantly more complicated. It allows this type of # table definition: # (:text2tbl OPTIONS:) # col1 SEP col2 SEP col3 # col1 SEP col2 SEP col3 # col1 SEP col2 SEP col3 # (:text2tbl:) # OPTIONS are specified in the usual opt=val method and can include the # following: # re=1 (treat the separater as a regular expression pattern) # sep=X (the separator for all columns will be X (can be multiple chars) # sep1=X sep2=Y sep3=Z (specify potentially different separators for each # column) # align=lrc (specify the alignment of each column as left, right, or center # the number of chars will equal the number of columns: llc, lrllc,etc) # fmt=AXAXAX (specify the alignment (A) and the separator (X) of each col # in a single option. Separators must be single-character. # options="border=1" (specify table options - whatever is in the quotes will # be put verbatim in the usual place on the 1st line) # quotes=1 (specify that you want double-quoted strings to be hidden from # possibly containing separators -- good for CSV and etc.) # keepquotes=1 (specify that the quotes should stay in the text rather than # being stripped out as would normally happen if quotes=1) # Note that specifying sep1,sep2,sep3 is mutually exclusive with specifying # sep and is mutually exclusive with specifying fmt. You can specify your # separator in exactly one way -- if you choose more than one way then something # will be ignored... $ROEPatterns["/(\\(:text2ta?ble?([^:\n]*):\\))\n(.*?\n)(\s*\\(:text2ta?ble?end:\\))/se"] = "t2tText2Tbl(PSS('$2'), PSS('$3'), PSS('$1'), PSS('$4'))"; function t2tText2Tbl($args, $tabledef, $text2tbl_begin, $text2tbl_end) { global $KeepToken, $KPV, $t2tKeepOriginal, $t2tEnableSuppressDefaultAlign; $opt = ParseArgs($args); SDV($opt['keeporiginal'], $t2tKeepOriginal); #echo "Text2Tbl(): Entering<br>\n"; # is it a regex? $re = @$opt['re']; # $sep will either be a string (same for all) or an array of strings # (each column can be different) if (isset($opt['sep1'])) for ($i=1; isset($opt["sep${i}"]); $i++) $sep[$i-1] = $opt["sep${i}"]; elseif (isset($opt['sep'])) $sep = $opt['sep']; elseif (isset($opt['fmt'])) { // align-sep-align-sep-align $align = substr($opt['fmt'], 0, 1); for ($i=1; $i<strlen($opt['fmt']); $i+=2) { $sep[($i-1)/2] = substr($opt['fmt'], $i, 1); $align .= substr($opt['fmt'], $i+1, 1); } } elseif ($opt['input'] == 'spaces' || $opt['input'] == 'space') { $sep = ' {2,}'; $re = true; } elseif ($opt['input'] == 'csv') { $sep = ','; $re = false; $opt['quotes'] = true; } elseif ($opt['input'] == 'tsv') { $sep = '\t'; $re = true; } else $sep = ($re?' +':','); // default to multi-spaces if re, comma non-re if (preg_match('/^(.*?)\(:end\-of\-original\-data[^:]*:\)/s', $tabledef, $m)) $tabledef = $m[1]; $lines = preg_split("/(?<!\\\\)\n/", $tabledef); # Determine how many columns (in the 1st line if not specified by $sep) if (is_array($sep)) $colcnt = sizeof($sep); else { if ($opt['quotes']) $x = preg_replace('/"([^"]*)"/e', "Keep(PSS('$1'),'P')", $lines[0]); else $x = $lines[0]; if ($re) $colcnt = preg_match_all("/$sep/", $x, $junk); else $colcnt = substr_count($x, "$sep"); } # Make sure we have a valid $align ($colcnt occurrences of l or r or c) if (!@$align) { $align = ''; if ($opt['align']) { if (strlen($opt['align']) == $colcnt+1) $align = $opt['align']; elseif (strlen($opt['align']) == 1) $align = str_repeat($opt['align'],$colcnt+1); elseif (strlen($opt['align']) < $colcnt+1) $align = $opt['align'] . str_repeat('l', $colcnt+1-strlen($opt['align'])); else $align = substr($align, 0, $colcnt+1); } if (!$align) $align = str_repeat('l', ${colcnt}+1); } $align = strtolower($align); # If it's not a regex then preg_quote everything if (!$re) { if (is_array($sep)) foreach ($sep as &$s) $s = preg_quote($s); else $sep = preg_quote($sep); } unset($s); // don't want the reference anymore - tuf debug! $ValidOpts = array('#'=>1, 'sep'=>1, 'align'=>1, 'fmt'=>1, 're'=>1, 'options'=>1, 'align'=>1, 'keepquotes'=>1, 'input'=>1, 'output'=>1, 'keeporiginal'=>1); for ($i=1; $i<25; $i++) $ValidOpts['sep'.$i] = 1; $newopt = array_diff_key($opt, $ValidOpts); if ($newopt) foreach ($newopt as $k=>$v) $opt['options'] .= ($opt['options']?" ":"") . "$k=$v"; ## ## Read from simple table ## if ($opt['input'] == 'simple') { if (preg_match("/^\s*\|\|\s*([^\|]*)$/", $lines[0], $m)) { array_shift($lines); $opt['options'] = ($opt['options']?$opt['options'].' ':'').$m[1]; } #echo "lines=<pre>".print_r($lines,true)."</pre><br>\n"; foreach ($lines as $k => &$line) { $line = preg_replace("/^\s*\|\|(.*?)(?:\|\|)?\s*$/s", "$1", $line); $cells[$k] = explode("||", $line); # Now check alignment and store override in an array... $lastcol=0; for ($i = 0; $i < sizeof($cells[$k]); $i++) { if ($x = t2tOptions($cells[$k][$i])) $options[$k][$col] = $x; if (!$cells[$k][$i] || preg_match("/^_{2,}$/", $cells[$k][$i])) { if ($colspan[$k][$lastcol]) $colspan[$k][$lastcol]++; else $colspan[$k][$lastcol] = 2; $cells[$k][$i] = ''; continue; } if (preg_match("/(\+{2,})(\s*)$/", $cells[$k][$i], $m)) { # Get rid of the +++ $cells[$k][$i] = preg_replace("/\+{2,}(\s*)$/", '$1', $cells[$k][$i]); $rowspan[$k][$i] = strlen($m[1]); } if (preg_match("/^\^{2,}$/", $cells[$k][$i])) { $cells[$k][$i] = ''; continue; } if (substr($cells[$k][$i], -1, 1) == ' ') { if (substr($cells[$k][$i], 0, 1) == ' ') $a = 'c'; else $a = 'l'; } elseif (substr($cells[$k][$i], 0, 1) == ' ') $a = 'r'; else $a = 'l'; if ($a != $align[$i]) { #echo "Overriding $k-$i: :".str_replace(" ", "X", $cells[$k][$i]).": $a<br>\n"; $align_override[$k][$i] = $a; } $lastcol = $i; } if ($i > $colcnt) $colcnt = $i; } $colcnt--; // some weird offset I've got going } else { ## ## Read from (:text2tbl ...:) "markup" ## if ($opt['quotes']) { $rpat = "/$KeepToken(\\d+P)$KeepToken/e"; $rrep = '$KPV[\'$1\']'; } if (!is_array($sep)) $s = $sep; foreach ($lines as $k => &$line) { if (!$line) continue; if ($opt['quotes']) $line = preg_replace('/"([^"]*)"/e', "Keep(PSS('$1'), 'P')", $line); for ($col=0; $col<=$colcnt; $col++) { if (is_array($sep)) $s = $sep[$col]; if ($col == $colcnt) { if ($x = t2tOptions($line)) $options[$k][$col] = $x; $cells[$k][$col] = $line; } elseif (preg_match("/^((.*?)".$s.")/", $line, $m)) { if ($x = t2tOptions($m[2])) $options[$k][$col] = $x; $line = substr($line, strlen($m[1])); $cells[$k][$col] = $m[2]; } # Note: we can't give this message because a rowspan could # result in a column "missing" which is totally valid #else echo "OOPS! No match... sep=$s, line=$line<br>\n"; } } } #echo "cnt=$colcnt, cells=<pre>".print_r($cells,true)."</pre><br>\n"; #echo "align_override=<pre>".print_r($align_override,true)."</pre><br>\n"; $lines = array(); if ($opt['output'] == 'advanced') { $aligntbl=array('l'=>' align=left', 'r'=>' align=right', 'c'=>' align=center'); if ($t2tEnableSuppressDefaultAlign) $aligntbl['l'] = ''; $lines[] = "(:table $opt[options]:)"; $newtbl=''; for ($j=0; $j<sizeof($cells); $j++) { for ($col=0; $col<=$colcnt; $col++) { if (!$cells[$j][$col]) continue; $a = (isset($align_override[$j][$col]) ? $align_override[$j][$col] : strtolower(substr($align, $col, 1))); #echo "a=$a ($aligntbl[$a])<br>\n"; $line = "(:cell" . ($col==0?'nr':'') . $aligntbl[$a]; if (substr($cells[$j][$col], 0, 1) == '!') { $cells[$j][$col] = substr($cells[$j][$col], 1); $line .= " style='text-align:center; font-weight:bold;'"; } if ($options[$j][$col]) $line .= ' '.$options[$j][$col]; if ($opt['attr'.($col+1)]) $line .= ' '.$opt['attr'.($col+1)]; if ($opt['attr']) $line .= ' '.$opt['attr']; if ($colspan[$j][$col]) $line .= " colspan=".$colspan[$j][$col]; if ($rowspan[$j][$col]) $line .= " rowspan=".$rowspan[$j][$col]; $line .= ":)"; $cell = trim($cells[$j][$col]); if ($opt['quotes']) { if ($opt['keepquotes']) $cell = preg_replace($rpat, '\'"\'.'.$rrep.'.\'"\'', $cell); else $cell = preg_replace($rpat, $rrep, $cell); } $lines[] = $line.$cell; } } $lines[] = "(:tableend:)"; } else { $newtbl = "|| $opt[options]\n"; for ($j=0; $j<sizeof($cells); $j++) { for ($col=0; $col<=$colcnt+1; $col++) { if ($col == 0 || strtolower(substr($align, $col-1, 1)) == 'r') $left = ''; else $left = ' '; if ($col < $colcnt+1 && strtolower($align{$col}) == 'l') $right = ''; else $right = ' '; if ($col == 0) { #echo "1: ".substr($cells[$j][$col], 0, 1). "<br>\n"; $line = "||${right}".$cells[$j][$col]; if (substr(ltrim($cells[$j][$col]), 0, 1) == '!') $line = "||!${right}".substr($cells[$j][$col], 1); else $line = "||${right}".$cells[$j][$col]; } elseif ($col == $colcnt+1) { #echo "2: ".substr($cells[$j][$col], 0, 1). "<br>\n"; $line = "${line}${left}||"; } else { #echo "3: ".substr($cells[$j][$col], 0, 1). "<br>\n"; if (substr(ltrim($cells[$j][$col]), 0, 1) == '!') $line = "${line}${left}||!${right}". substr($cells[$j][$col], 1); else $line = "${line}${left}||${right}".$cells[$j][$col]; } } if ($opt['quotes']) { if ($opt['keepquotes']) $line = preg_replace($rpat, '\'"\'.'.$rrep.'.\'"\'', $line); else $line = preg_replace($rpat, $rrep, $line); } $lines[] = $line; } } if ($opt['keeporiginal']) { $newtbl = $text2tbl_begin ."\n". $tabledef . "(:end-of-original-data (THE TABLE BELOW WILL BE OVER-WRITTEN EVERY TIME THIS PAGE IS SAVED!) Specify keeporiginal=0 if you want to get rid of the text above.:)\n". $newtbl . implode("\n",$lines) ."\n". $text2tbl_end; } else $newtbl .= implode("\n",$lines); return($newtbl); } function t2tOptions(&$string) { if (preg_match("/^\s*\(:t2t\s([^:]*):\)(.*)$/", $string, $m)) { $opt = $m[1]; $string = $m[2]; // as a side effect, strip this (:t2t ...:) markup return($opt); } else return(''); } # If keep=1 was specified then the original data is maintained in the source, # but it must not be displayed. This markup makes the original data (and the # text2tbl markup) invisible. Markup('t2t-comment', '<{$var}', "/\(:text2ta?ble?[^\n]*:\).*?\(:end-of-original-data[^:]*:\)\n(.*?)\n\(:text2ta?ble?end:\)/s", PSS('$1')); # This will implement a "real" markup which will convert any of the usual # types of structured text (as implemented in Text2Tbl() above) but will not # alter the source Markup('flextable', '<t2t-comment', "/(\\(:flexta?ble?([^:\n]*):\\))\n(.*?\n)(\s*\\(:flexta?ble?end:\\))/se", "FlexTbl(PSS('$2'), PSS('$3'), PSS('$1'), PSS('$4'))"); function FlexTbl($args, $tabledef, $text2tbl_begin, $text2tbl_end) { if (!strstr($args, 'keeporiginal')) $args .= " keeporiginal=0"; return(t2tText2Tbl($args, $tabledef, $text2tbl_begin, $text2tbl_end)); }