<?php if (!defined('PmWiki')) exit();

$RecipeInfo['wikipath']['Version'] = '1.1';
/*
 *  Author: Martin Fick
 *    Date: 11/28/2006
 *
 */


Markup('{{wikipath}}','_begin', '/{{(.*?)\\$(.*?)}}/e',
      "wikipath_page(\$pagename, '\$1', '\$2')");


/*
   Get the page to which the wikipath refers, parse the wikipath
   and look it up by iterating over the page text applying each
   molecule to it.
*/
function wikipath_page($pagename, $page, $wikipath) {
  global $WIKIPATH_DEBUG;
  if ($page === '') $page= $pagename;

  $text = IncludeText($pagename, $page);
  $lines = explode("\n", $text);
  $secs[] = $lines;

  $false1 = FALSE; $false2 = FALSE;  // We don't want them linked
  $mols = wikipath_parse($wikipath, null, $false1, $false2, $dout);

  foreach ((array) $mols as $mol) {
    if ($WIKIPATH_DEBUG) $out .= wikipath_show_molecule($mol);
    $nsecs = null; unset ($nsecs);
    $nsecs = wikipath_lookup_molecule($mol, $secs);
#echo "WIKIPATH:$wikipath<br>";print_r($mol); echo "<br>"; print_r($nsecs); echo "<hr>";
    $secs = null; unset ($secs); $secs = $nsecs;
  }

  if ($WIKIPATH_DEBUG) $br = "<br>";
  foreach ((array) $secs as $sec) $joined[]= join("$br\n", $sec);
  $rtn = join("$br\n", (array)$joined);

  if ($WIKIPATH_DEBUG)
    return "'''[$wikipath]''' &nbsp;&nbsp;$dout<br><br>$out\n$rtn\n----\n<br/>";

  return $rtn;
}

/*
    This is for debugging, it prints out a molecule.

    A molecule is the smallest indepedent piece of a wikipath.  It is
    an individual working piece which starts with an element is
    followed by optional IDs.  A molecule can be further subdivided,
    but the pieces are dependent upon each other and so they are more
    easily dealt with as a unit.  Molecules are represented like this:

   [] [] [LIKE]   or   [RANGE]              (optional first line)
      [] [SEC] [sec]   or   [] [EL] [el]    (optional first or second line)
      [] [ID TYPE] [id]
      ...
   [] ...
*/
function wikipath_show_molecule($mol) {
  foreach ((array) $mol as $key => $val) foreach($val as $k => $v)
    $out1 .= "\n# [$k] $v";
  return "\n<br> molecule { $out1\n}";
}

/*
   This parses the wikipath expression and expresses it as an
   array of molecules so that it is easier to handle in other
   parts of the recipe.  One reason to split the parsing and
   lookup into two sections is because sometimes the parsing
   involves looking ahead.  This also somewhat isolates the 
   grammar from the lookup implementation
*/
function wikipath_parse($wikipath, $molecule, &$sec_after, &$t_sec_after, & $dout) {

  $SEC_ELs= "([*!#>]|-&gt;|::|\\[#\\])";  // Section Elements
  $ELs = "[;:,.]";                    // Elements

  $T_SEC= "%";                        // Terminator for Sections
  $T_EL= "\\^";                       // Terminator for Elements
  $ID_N= "[0-9]+";                    // ID Numerical
  $ID_L= "[A-z]+";                    // ID Leading type
  $ID_RE="\\/.+?\\/";                 // ID of type Regular expression
  $SEC= "$SEC_ELs\\1*[+-]?";          // A section
  $ID_N_RG="($ID_N)-($ID_N)";         // A numerical range
  $ID_N_SRG="($ID_N)([+-])";          // A single ended numerical range
  $ID= "$ID_L|$ID_N_RG|$ID_N_SRG|$ID_N|$ID_RE"; // An ID


  $dout = "";
  $t_sec_after = FALSE; // Is a section terminator after an id of ours?
  if ($wikipath == "") {
    $sec_after = FALSE;
    $dout = "END";
    return array($molecule);
  }

  //  Section Elements
  if (preg_match("/^$SEC/", $wikipath, $m)) {
    list($sec_el) = $m;
    $wprem = substr($wikipath, strlen($sec_el));
    $out = "SEC_EL(''$sec_el'')";
    $newmolecule[] = array("SEC_EL" => $sec_el);
    $is_sec = "UNKNOWN";
    if (preg_match("/^$T_EL/", $wprem, $m)) {
      $wprem = substr($wprem, 1);
      $out .= " T_EL";
      $is_sec = FALSE;
    }
    if (preg_match("/^$T_SEC/", $wprem, $m)) {
      $wprem = substr($wprem, 1);
      $out .= " T_SEC";
      $is_sec = TRUE;
    }

    $compound = wikipath_parse($wprem, $newmolecule, $sec_after, $t_sec_after, $dout);
    // $sec_after can be calculated here instead by looking at the
    // the rest of the molecules
    // $t_sec_after can be calculated here instead by looking at the
    // second molecule of $compound
    if ($t_sec_after) {
      if($molecule !== NULL) array_unshift($compound, $molecule);
      return $compound;
    }

    if ("UNKNOWN" === $is_sec) {
       $is_sec = $sec_after;
       if ($is_sec) $out .= " SEC_AFTER";
       else         $out .= " T_END";
    }
    $sec_after = TRUE;
    $firstmol = array_shift($compound);
    $sec_el = array_shift($firstmol);
    if ($is_sec) {
      array_unshift($firstmol, array("SEC" => $sec_el["SEC_EL"]));
      $out = "SECTION{ $out }";
    }
    else {
      array_unshift($firstmol, array("EL" => $sec_el["SEC_EL"]));
      $out = "ELEMENT{ $out }";
    }
    array_unshift($compound, $firstmol);
    if($molecule !== NULL) array_unshift($compound, $molecule);

    $dout = $out . " " . $dout;
    return $compound;
  }

  if (preg_match("/^$ELs/", $wikipath, $m)) {
    list($el) = $m;
    $wprem = substr($wikipath, strlen($el));
    $out = "EL(''$el'')";
    $newmolecule[] = array("EL" => $el);
    $compound = wikipath_parse($wprem, $newmolecule, $sec_after, $t_sec_after, $dout);
    if($molecule !== NULL) array_unshift($compound, $molecule);
    $dout = $out . " " . $dout;
    return $compound;
  }

  // IDs
  if (preg_match("/^$ID/", $wikipath, $m)) {
    list($w_id) = $m;
    $wprem = substr($wikipath, strlen($w_id));
    $out = "ID(''$w_id'')";

    $id = "ID";
    if (preg_match("/^$ID_L/", $w_id, $m))         $id = "ID_L";
    elseif (preg_match("/^$ID_N_RG/", $w_id, $m)) {
      $id = "ID_N_RG"; $w_id = $m;
    }
    elseif (preg_match("/^$ID_N_SRG/", $w_id, $m)) {
      $id = "ID_N_SRG"; $w_id = $m;
    }
    elseif (preg_match("/^$ID_N/", $w_id, $m))    $id = "ID_N";
    elseif (preg_match("/^$ID_RE/", $w_id, $m))   $id = "ID_RE";

    $molecule[] = array($id => $w_id);

    if (preg_match("/^$T_SEC/", $wprem, $m)) {
      $wprem = substr($wprem, 1);
      $out .= " T_SEC ]";
      array_unshift($molecule, array("LIKE"=>"LIKE"));
      $t_sec = TRUE;
    }
    $compound = wikipath_parse($wprem, $molecule, $sec_after, $$t_sec_after, $dout);
    if ($t_sec)  $t_sec_after = TRUE;
    $dout = "$out $dout";
    return $compound;
  }

  $dout = $out;
  return;
}

/* This returns a regexp used to identify the end of a section */
function wikipath_end($el) {
  if(preg_match('/^!+[-+]?$/', $el, $m))
    return preg_replace('/^((!)+)\\+?$/e', "'^'.preg_quote('\$2', '/').'{1,'.strlen('\$1').'}(?!'.preg_quote('\$2', '/').')'",
      $el);
   // Still need to ignore contents of tables
  return null;
}
/* This returns a regexp indicating still being in a section */
function wikipath_notend($el) {
  if(preg_match('/^[#*]+[+-]?$/', $el, $m))
    return preg_replace('/^([#*]+)[+-]?$/e', "'^[#*]{'.strlen('\$1'.'1').'}'", $el);

  return null;
}

/* This returns a regexp used to identify an element */
function wikipath_beg($el) {
  if($el == ';')  return '';
  if($el == ':')  return '^ *[-_.A-z]+ *:';
  if($el == '::')  return '^ *: *[-_.A-Za-z]+ *:';
  if(preg_match('/^[#*!]+$/', $el, $m))
    return preg_replace('/^(([#*!])+)$/e',
           "'^'.preg_quote('\$1', '/').'(?!'.preg_quote('\$2', '/').')'", $el);
  if(preg_match('/^[#*!]+\\+$/', $el, $m))
    return preg_replace('/^([#*!]+)\\+$/e', "'^'.preg_quote('\$1', '/')", $el);
  if(preg_match('/^[#*!]+-$/', $el, $m))
    return preg_replace('/^(([#*!])+)-$/e', "'^'.preg_quote('\$2', '/').'{1,'.strlen('\$1').'}(?!'.preg_quote('\$2', '/').')'", $el);
  return null;
}

/*
   This returns the part of an item which will be used for IDs.
   Generally this means stripping off the element or section
   identifer.  Not the best name or description, sorry.
*/
function wikipath_rm_el($el, $item) {
  if(is_array($item)) { $tmp = $item[0]; $item = $tmp; }
  if($el == '::')  return preg_replace('/^ *:/', '', $item);;
  if(preg_match('/^[#*!]+$/', $el, $m))
    return preg_replace('/^([#*!]+)/', '', $item);
  return $item;
}
/* This returns the part of an item that we actually want to capture */
function wikipath_capture($el, $item) {
  if($el == ':')  return preg_replace('/^ *[-_.A-z]+ *:/', '', $item);
  if($el == '::')  return preg_replace('/^ *: *[-_.A-z]+ *:/', '', $item);
  return $item;
}
/* This filters a set by the ids in a molecule */
function wikipath_filter_set($mol, $sets, $el) {

  foreach ((array) $mol as $i => $val) {

    foreach($val as $k => $v) ; // load $k and $v with the last values

    $out = array();
    if($k == "SEC" || $k == "EL") next;
    if($k == "ID_L") {
      foreach ($sets as $set) {
        if ($el == '.') {
          if (substr($set, 0, strlen($v)) === $v) $out[]=$set;
        } else {
          $rmel = wikipath_rm_el($el, $set);
          $stripl = preg_replace('/^ */', '', $rmel);
          if (substr($stripl, 0, strlen($v)) === $v) $out[]= $set;
        }
      }
      $sets = $out;
    } elseif($k == "ID_RE") {
      foreach ($sets as $set)
        if (preg_match($v, $set, $m)) $out[]=$set;
      $sets = $out;
    } elseif($k == "ID_N") {
      if($v < count($sets) ) { $out[] = $sets[$v - 1]; $sets = $out; }
      else $sets = array();
    } elseif($k == "ID_N_RG") {
      for($i_set=$v[1]-1 ; $i_set < count($sets) && $i_set < $v[2]; $i_set++)
        $out[] = $sets[$i_set];
      $sets = $out;
    } elseif($k == "ID_N_SRG") {
      if($v[2] == "+")
        for($i_set=$v[1]-1 ; $i_set < count($sets); $i_set++)
          $out[] = $sets[$i_set];
      if ($v[2] == "-")
        for($i_set=0 ; $i_set < count($sets) && $i_set < $v[1]; $i_set++)
          $out[] = $sets[$i_set];
      $sets = $out;
    }
  } // for $i loop

#  foreach ($sets as $set) echo $pre . wikipath_capture($el, $set) . "<BR>";
#echo "<hr>";
  foreach ($sets as $set) $cap[]= wikipath_capture($el, $set);
  return $cap;
}

/* This breaks up sets up into sets of the elements we are looking for */
function wikipath_getels($secs, $el) {
  $out = array();
  if ($el == '.') {
    foreach ($secs as $sec) {
      $wds = null;
      foreach($sec as $line)
        $wds = array_merge((array) $wds, preg_split('/\s+/', $line));
      $out[] = $wds;
    }
    return $out;
  } else {
    $beg = wikipath_beg($el);
    if ($beg === null) return array();
    foreach ($secs as $sec) {
      $olines = array();
      foreach($sec as $line)
        if (preg_match("/$beg/", $line, $m)) $olines[] = $line;
      if (count($olines)) $out[] = $olines;
    }
    return $out;
  }
  return $secs;
}

/* This breaks up sets up into the sections we are looking for */
function wikipath_getsecs($secs, $el) {
  $out = array();
  $beg = wikipath_beg($el);
  $end = wikipath_end($el);
  $notend = wikipath_notend($el);
  if ($beg === null) return $out;

  $olines = array();
  foreach ($secs as $sec) {
    foreach($sec as $line) {
      if ($in_section) {
        if (   $end != null &&   preg_match("/$end/", $line, $m) ||
            $notend !=null  && ! preg_match("/$notend/", $line, $m) ) {
          $in_section = FALSE;
          $out[] = $olines;
          $olines = array();
        }
      }
      if (! $in_section && preg_match("/$beg/", $line, $m))
        $in_section = TRUE;
      if ($in_section) $olines[] = $line;
    }
  }
  if(count($olines)) $out[] = $olines;
  return $out;
}

/* Looking up a molecule in an array of sections. */
function wikipath_lookup_molecule($mol, $secs) {
  $out = array();
  foreach ((array) $mol as $i => $val) {
    foreach($val as $k => $v) ; // load $k and $v with the last values
    if ($k == "SEC") {
      $secs = wikipath_getsecs($secs, $v);
      $out = wikipath_filter_set($mol, $secs, $v);
      return $out;
    }
    if ($k == "EL") {
      if ($v == '.') {
        $secs = wikipath_getels($secs, $v);
        foreach ($secs as $sec)
          $wds = array_merge((array) $wds,
             wikipath_filter_set($mol, $sec, $v));
        foreach($wds as $w) $out[] = array($w);
        return $out;
      }
      $secs = wikipath_getels($secs, $v);
      foreach ($secs as $sec)
        $lines = array_merge($lines, wikipath_filter_set($mol, $sec, $v));
      foreach($lines as $line) $out[] = array($line);
      return $out;
    }
  }
  return array();
}