<?php if (!defined('PmWiki')) exit();
# vim: set ts=4 sw=4 et:
##
##        File: text2tbl.php
##     Version: 2010-04-06
##      SVN ID: $Id$
##      Status: alpha
##      Author: Peter Bowers
## Create Date: August 25, 2008
##   Copyright: 2008-2010, Peter Bowers
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License, Version 2, as
## published by the Free Software Foundation.
## http://www.gnu.org/copyleft/gpl.html
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## text2tbl.php provides a couple $ROEPatterns to convert text to simple table
## markup.  

$RecipeInfo['Text2Tbl']['Version'] = '2013-09-23';
define(text2tbl, true);

SDV($t2tKeepOriginal, 1);
SDV($t2tEnableSuppressDefaultAlign, false);

# This pattern lets you define a table like this:
#    TABLESTART border=1
#    abc    def       ghi
#    jkl    asdfasfd  asdf
#    qwer   sss       sssssss
#    TABLEEND
# You don't have to make things line up -- just make sure that there are no
# occurrences of multiple consecutive spaces unless you want them to become
# the dividers between columns.
$ROEPatterns["/TABLESTART([^\n]*)\n(.*?)\n\s*TABLEEND/se"] = "'|| '.PSS('$1').'\n'.PSS(preg_replace(array('/ {2,}/', '/^/m', '/$/m'), array(' ||', '||', ' ||'), PSS('$2')))";

# This pattern is significantly more complicated.  It allows this type of
# table definition:
#    (:text2tbl OPTIONS:)
#    col1 SEP col2 SEP col3
#    col1 SEP col2 SEP col3
#    col1 SEP col2 SEP col3
#    (:text2tbl:)
# OPTIONS are specified in the usual opt=val method and can include the 
# following:
#    re=1 (treat the separater as a regular expression pattern)
#    sep=X (the separator for all columns will be X (can be multiple chars)
#    sep1=X sep2=Y sep3=Z (specify potentially different separators for each
#          column)
#    align=lrc (specify the alignment of each column as left, right, or center
#          the number of chars will equal the number of columns: llc, lrllc,etc)
#    fmt=AXAXAX (specify the alignment (A) and the separator (X) of each col
#          in a single option.  Separators must be single-character.
#    options="border=1" (specify table options - whatever is in the quotes will
#          be put verbatim in the usual place on the 1st line)
#    quotes=1 (specify that you want double-quoted strings to be hidden from
#          possibly containing separators -- good for CSV and etc.)
#    keepquotes=1 (specify that the quotes should stay in the text rather than
#          being stripped out as would normally happen if quotes=1)
# Note that specifying sep1,sep2,sep3 is mutually exclusive with specifying
# sep and is mutually exclusive with specifying fmt.  You can specify your
# separator in exactly one way -- if you choose more than one way then something
# will be ignored...
$ROEPatterns["/(\\(:text2ta?ble?([^:\n]*):\\))\n(.*?\n)(\s*\\(:text2ta?ble?end:\\))/se"] = "t2tText2Tbl(PSS('$2'), PSS('$3'), PSS('$1'), PSS('$4'))";
function t2tText2Tbl($args, $tabledef, $text2tbl_begin, $text2tbl_end)
{
    global $KeepToken, $KPV, $t2tKeepOriginal, $t2tEnableSuppressDefaultAlign;

    $opt = ParseArgs($args);

    SDV($opt['keeporiginal'], $t2tKeepOriginal);

    #echo "Text2Tbl(): Entering<br>\n";
    # is it a regex?
    $re = @$opt['re'];
    # $sep will either be a string (same for all) or an array of strings 
    # (each column can be different)
    if (isset($opt['sep1']))
        for ($i=1; isset($opt["sep${i}"]); $i++)
            $sep[$i-1] = $opt["sep${i}"];
    elseif (isset($opt['sep'])) $sep = $opt['sep'];
    elseif (isset($opt['fmt'])) { // align-sep-align-sep-align
        $align = substr($opt['fmt'], 0, 1);
        for ($i=1; $i<strlen($opt['fmt']); $i+=2) {
            $sep[($i-1)/2] = substr($opt['fmt'], $i, 1);
            $align .= substr($opt['fmt'], $i+1, 1);
        }
    }
    elseif ($opt['input'] == 'spaces' || $opt['input'] == 'space') {
        $sep = ' {2,}';
        $re = true;
    } elseif ($opt['input'] == 'csv') {
        $sep = ',';
        $re = false;
        $opt['quotes'] = true;
    } elseif ($opt['input'] == 'tsv') {
        $sep = '\t';
        $re = true;
    } else $sep = ($re?'  +':','); // default to multi-spaces if re, comma non-re

    if (preg_match('/^(.*?)\(:end\-of\-original\-data[^:]*:\)/s', $tabledef, $m))
        $tabledef = $m[1];
    $lines = preg_split("/(?<!\\\\)\n/", $tabledef);

    # Determine how many columns (in the 1st line if not specified by $sep)
    if (is_array($sep)) $colcnt = sizeof($sep);
    else {
        if ($opt['quotes'])
            $x = preg_replace('/"([^"]*)"/e', "Keep(PSS('$1'),'P')", $lines[0]);
        else $x = $lines[0];
        if ($re) $colcnt = preg_match_all("/$sep/", $x, $junk);
        else $colcnt = substr_count($x, "$sep");
    }

    # Make sure we have a valid $align ($colcnt occurrences of l or r or c)
    if (!@$align) {
        $align = '';
        if ($opt['align']) {
            if (strlen($opt['align']) == $colcnt+1) 
                $align = $opt['align'];
            elseif (strlen($opt['align']) == 1) 
                $align = str_repeat($opt['align'],$colcnt+1);
            elseif (strlen($opt['align']) < $colcnt+1)
                $align = $opt['align'] . str_repeat('l', $colcnt+1-strlen($opt['align']));
            else
                $align = substr($align, 0, $colcnt+1);
        }
        if (!$align) $align = str_repeat('l', ${colcnt}+1);
    }
    $align = strtolower($align);

    # If it's not a regex then preg_quote everything
    if (!$re) {
        if (is_array($sep))
            foreach ($sep as &$s)
                $s = preg_quote($s);
        else $sep = preg_quote($sep);
    }
    unset($s); // don't want the reference anymore - tuf debug!

    $ValidOpts = array('#'=>1, 'sep'=>1, 'align'=>1, 'fmt'=>1, 're'=>1, 
        'options'=>1, 'align'=>1, 'keepquotes'=>1, 'input'=>1, 'output'=>1, 
        'keeporiginal'=>1);
    for ($i=1; $i<25; $i++) $ValidOpts['sep'.$i] = 1;
    $newopt = array_diff_key($opt, $ValidOpts);
    if ($newopt)
        foreach ($newopt as $k=>$v)
            $opt['options'] .= ($opt['options']?" ":"") . "$k=$v";

    ##
    ## Read from simple table
    ##
    if ($opt['input'] == 'simple') {
        if (preg_match("/^\s*\|\|\s*([^\|]*)$/", $lines[0], $m)) {
            array_shift($lines);
            $opt['options'] = ($opt['options']?$opt['options'].' ':'').$m[1];
        }
        #echo "lines=<pre>".print_r($lines,true)."</pre><br>\n";
        foreach ($lines as $k => &$line) {
            $line = preg_replace("/^\s*\|\|(.*?)(?:\|\|)?\s*$/s", "$1", $line);
            $cells[$k] = explode("||", $line);
            # Now check alignment and store override in an array...
            $lastcol=0;
            for ($i = 0; $i < sizeof($cells[$k]); $i++) {
                if ($x = t2tOptions($cells[$k][$i]))
                    $options[$k][$col] = $x;
                if (!$cells[$k][$i] || preg_match("/^_{2,}$/", $cells[$k][$i])) {
                    if ($colspan[$k][$lastcol])
                        $colspan[$k][$lastcol]++;
                    else
                        $colspan[$k][$lastcol] = 2;
                    $cells[$k][$i] = '';
                    continue;
                }
                if (preg_match("/(\+{2,})(\s*)$/", $cells[$k][$i], $m)) {
                    # Get rid of the +++
                    $cells[$k][$i] = preg_replace("/\+{2,}(\s*)$/", '$1', $cells[$k][$i]);
                    $rowspan[$k][$i] = strlen($m[1]);
                }
                if (preg_match("/^\^{2,}$/", $cells[$k][$i])) {
                    $cells[$k][$i] = '';
                    continue;
                }
                if (substr($cells[$k][$i], -1, 1) == ' ') {
                    if (substr($cells[$k][$i], 0, 1) == ' ')
                        $a = 'c';
                    else
                        $a = 'l';
                } elseif (substr($cells[$k][$i], 0, 1) == ' ')
                    $a = 'r';
                else
                    $a = 'l';
                if ($a != $align[$i]) {
                    #echo "Overriding $k-$i: :".str_replace(" ", "X", $cells[$k][$i]).": $a<br>\n";
                    $align_override[$k][$i] = $a;
                }
                $lastcol = $i;
            }
            if ($i > $colcnt) $colcnt = $i;
        }
        $colcnt--; // some weird offset I've got going
    } else {
        ##
        ## Read from (:text2tbl ...:) "markup"
        ##
        if ($opt['quotes']) {
            $rpat = "/$KeepToken(\\d+P)$KeepToken/e";
            $rrep = '$KPV[\'$1\']';
        }
        if (!is_array($sep)) $s = $sep;
        foreach ($lines as $k => &$line) {
            if (!$line) continue;
            if ($opt['quotes'])
                $line = preg_replace('/"([^"]*)"/e', "Keep(PSS('$1'), 'P')", $line);
            for ($col=0; $col<=$colcnt; $col++) {
                if (is_array($sep))
                    $s = $sep[$col];
                if ($col == $colcnt) {
                    if ($x = t2tOptions($line))
                        $options[$k][$col] = $x;
                    $cells[$k][$col] = $line;
                } elseif (preg_match("/^((.*?)".$s.")/", $line, $m)) {
                    if ($x = t2tOptions($m[2]))
                        $options[$k][$col] = $x;
                    $line = substr($line, strlen($m[1]));
                    $cells[$k][$col] = $m[2];
                } 
                # Note: we can't give this message because a rowspan could
                # result in a column "missing" which is totally valid
                #else echo "OOPS! No match... sep=$s, line=$line<br>\n";
            }
        }
    }
#echo "cnt=$colcnt, cells=<pre>".print_r($cells,true)."</pre><br>\n";
#echo "align_override=<pre>".print_r($align_override,true)."</pre><br>\n";
    $lines = array();
    if ($opt['output'] == 'advanced') {
        $aligntbl=array('l'=>' align=left', 'r'=>' align=right', 'c'=>' align=center');
        if ($t2tEnableSuppressDefaultAlign) $aligntbl['l'] = '';
        $lines[] = "(:table $opt[options]:)";
        $newtbl='';
        for ($j=0; $j<sizeof($cells); $j++) {
            for ($col=0; $col<=$colcnt; $col++) {
                if (!$cells[$j][$col]) continue;
                $a = (isset($align_override[$j][$col]) ? $align_override[$j][$col] : strtolower(substr($align, $col, 1)));
                #echo "a=$a ($aligntbl[$a])<br>\n";
                $line = "(:cell" . ($col==0?'nr':'') .  $aligntbl[$a];
                if (substr($cells[$j][$col], 0, 1) == '!') {
                    $cells[$j][$col] = substr($cells[$j][$col], 1);
                    $line .= " style='text-align:center; font-weight:bold;'";
                }
                if ($options[$j][$col])
                    $line .= ' '.$options[$j][$col];
                if ($opt['attr'.($col+1)])
                    $line .= ' '.$opt['attr'.($col+1)];
                if ($opt['attr'])
                    $line .= ' '.$opt['attr'];
                if ($colspan[$j][$col])
                    $line .= " colspan=".$colspan[$j][$col];
                if ($rowspan[$j][$col])
                    $line .= " rowspan=".$rowspan[$j][$col];
                $line .= ":)";
                $cell = trim($cells[$j][$col]);
                if ($opt['quotes']) {
                    if ($opt['keepquotes'])
                        $cell = preg_replace($rpat, '\'"\'.'.$rrep.'.\'"\'', $cell);
                    else
                        $cell = preg_replace($rpat, $rrep, $cell);
                }
                $lines[] = $line.$cell;
            }
        }
        $lines[] = "(:tableend:)";
    } else {
        $newtbl = "|| $opt[options]\n";
        for ($j=0; $j<sizeof($cells); $j++) {
            for ($col=0; $col<=$colcnt+1; $col++) {
                if ($col == 0 || strtolower(substr($align, $col-1, 1)) == 'r') 
                    $left = '';
                else $left = ' ';
                if ($col < $colcnt+1 && strtolower($align{$col}) == 'l') 
                    $right = '';
                else $right = ' ';
                if ($col == 0) {
                    #echo "1: ".substr($cells[$j][$col], 0, 1). "<br>\n";
                    $line = "||${right}".$cells[$j][$col];
                    if (substr(ltrim($cells[$j][$col]), 0, 1) == '!')
                        $line = "||!${right}".substr($cells[$j][$col], 1);
                    else
                        $line = "||${right}".$cells[$j][$col];
                } elseif ($col == $colcnt+1) {
                    #echo "2: ".substr($cells[$j][$col], 0, 1). "<br>\n";
                    $line = "${line}${left}||";
                } else {
                    #echo "3: ".substr($cells[$j][$col], 0, 1). "<br>\n";
                    if (substr(ltrim($cells[$j][$col]), 0, 1) == '!')
                        $line = "${line}${left}||!${right}".
                                  substr($cells[$j][$col], 1);
                    else
                        $line = "${line}${left}||${right}".$cells[$j][$col];
                }
            }
            if ($opt['quotes']) {
                if ($opt['keepquotes'])
                    $line = preg_replace($rpat, '\'"\'.'.$rrep.'.\'"\'', $line);
                else
                    $line = preg_replace($rpat, $rrep, $line);
            }
            $lines[] = $line;
        }
    }
    if ($opt['keeporiginal']) {
        $newtbl = $text2tbl_begin ."\n". $tabledef . "(:end-of-original-data (THE TABLE BELOW WILL BE OVER-WRITTEN EVERY TIME THIS PAGE IS SAVED!) Specify keeporiginal=0 if you want to get rid of the text above.:)\n". $newtbl . implode("\n",$lines) ."\n". $text2tbl_end;
    } else
        $newtbl .= implode("\n",$lines);
    return($newtbl);
}

function t2tOptions(&$string)
{
    if (preg_match("/^\s*\(:t2t\s([^:]*):\)(.*)$/", $string, $m)) {
        $opt = $m[1];
        $string = $m[2]; // as a side effect, strip this (:t2t ...:) markup
        return($opt);
    } else
        return('');
}

# If keep=1 was specified then the original data is maintained in the source,
# but it must not be displayed.  This markup makes the original data (and the
# text2tbl markup) invisible.
Markup('t2t-comment', '<{$var}', 
    "/\(:text2ta?ble?[^\n]*:\).*?\(:end-of-original-data[^:]*:\)\n(.*?)\n\(:text2ta?ble?end:\)/s",
    PSS('$1'));
# This will implement a "real" markup which will convert any of the usual
# types of structured text (as implemented in Text2Tbl() above) but will not
# alter the source
Markup('flextable', '<t2t-comment',
    "/(\\(:flexta?ble?([^:\n]*):\\))\n(.*?\n)(\s*\\(:flexta?ble?end:\\))/se",
    "FlexTbl(PSS('$2'), PSS('$3'), PSS('$1'), PSS('$4'))");

function FlexTbl($args, $tabledef, $text2tbl_begin, $text2tbl_end)
{
    if (!strstr($args, 'keeporiginal')) $args .= " keeporiginal=0";
    return(t2tText2Tbl($args, $tabledef, $text2tbl_begin, $text2tbl_end));
}