'lupus'); # 3. Include this script in "config.php" and call your wiki with # "?action=phpwikic": # include_once("$FarmD/cookbook/phpwikiconv.php"); # # Notes: # 1. Files in $OutWikiDir will be overwritten! # 2. WikiWords will be disabled. # 3. No PhpWiki plugins are converted (extend the $PWC_MarkupConversions array # to convert them). # 4. Only Old Style and Definition Style tables are supported. # 5. Definition style table conversion might be quirky. Moreover, tables within # tables and spanning is not supported for these. # 6. There are 3 arrays that control the behaviour of markup conversion: # * main conversion happens for markup specified in $PWC_MarkupConversions, # * $PWC_MarkupConversionsPre might be used for things requiring a 2 step conversion, # * $PWC_MarkupConversionsPost might be used to change the resulting markup # (MarkupRestore() is called before doing these) # 7. One can create a $PWC_MarkupTestString variable and put some PhpWiki markup in # it, and run PmWiki with ?action=phpwikit to test markup conversion. # # History: # 2010-06-20: found in the attic, published. # 2010-01: added some comments, a description. # 2009-spring: created for a one-time convertion. SDV($InWikiDir,'phpwiki.in'); SDV($OutWikiDir,'phpwiki.out'); SDV($InEncoding,'windows-1252'); SDV($OutEncoding,'utf-8'); # $SkipPageFilenames shall specify which PhpWiki pagenames should be skipped # during the conversion (the argument is passed to MatchPageNames() PmWiki # function). SDV($SkipPageFilenames,'PhpWiki*'); # Converted pages will be put to this group # (except the exceptions defined in $PWC_PageNameConversions) SDV($OutGroup,'PhpWiki'); # A facility to rename pages, or put them to a group other than $OutGroup SDVA($PWC_PageNameConversions, array( # example: '/^(John|Judas)$/' => 'Profiles.$1' )); SDV($PHPWikiWikiWordPattern, '(?#\[([A-Za-z][-\w:.]*)\])/e' => "Keep('$1[[#$1]]')" ## #[|foo]: An empty anchor with id "foo" ## #[howdy|foo]: An anchor around the text "howdy" with id "foo" ,'/(?>#\[([^\|\]]*)\|([A-Za-z][-\w:.]+)\])/e' => "Keep('$1'=='' ? '[[#$2]]' : '$1[[#$2]]')" ## image links [http://site.com/xxx.jpg|bla bla] ,"#t?\[\ *((?:https?|ftp)://(?:[^\s$UrlExcludeChars]*))\.(jpg|png|gif)\ *(?:\\n\|{1,2} | \|{1,2}\\n | \| |) ([^\[\]|\\n]*) \]#xie" => "pwc_convertImageLinks('$1', '$2', '$3')" ## apply styling to link text before Keep() #,'!\[\ *_([^\|\]\n_]+)_!' => "[''$1''" # emphasis, pmwiki ,'!\[\ *_([^\|\]\n_]+)_!' => "[//$1//" # emphasis, creole ##,'!\[\ *\*([^\|\]\n*]+)\*!' => "['''$1'''" # strong, pmwiki ,'!\[\ *\*([^\|\]\n*]+)\*!' => "[**$1**" # strong, creole ## links [bla bla | local/url link] ,'!\[ ([^\|\]\n]+) \| \s* ([^\[\]]+) \]!xe' => "Keep('[[$2 | $1]]')" ## [free links] ,'!(?>\[\s*(.+?)\ *\])!e' => "Keep('[[$1]]')" ## bare links ,"#\b(?:https?|ftp)://[^\s$UrlExcludeChars]*[^\s.,?!$UrlExcludeChars]#e" => "Keep('$0')" ## old style tables ,'!<\?plugin OldStyleTable\s*(.+?)\?>!se' => "pwc_OldStyleTableConvert('$1', \$pagename)" ## get rid of WikiWords #,"/(~*)($PHPWikiWikiWordPattern)/e" => "('$1'==''||strlen('$1')%2==0) ? Keep('[[$2]]') : '$2'" ,"/(~*)($PHPWikiWikiWordPattern)/e" => "('$1'==''||strlen('$1')%2==0) ? '[['.AsSpaced('$2').']]' : '$2'" ## get rid of WikiWord escaping ~ ,'/~~/' => '~' ## bold ,'!\*(.*?)\*!' => "'''$1'''" # pmwiki ,'!(.*?)!' => "'''$1'''" # pmwiki #,'!\*(.*?)\*!' => "**$1**" # creole #,'!(.*?)!' => "**$1**" # creole ## italics ,'!_(.*?)_!' => "''$1''" # pmwiki ,'!(.*?)!' => "''$1''" # pmwiki #,'!_(.*?)_!' => "//$1//" # creole #,'!(.*?)!' => "//$1//" # creole ## bold italics ,'!(?:_\*|\*_)(.+?)(?:_\*|\*_)!' => "'''''$1'''''" # pmwiki #,'!(?:_\*|\*_)(.+?)(?:_\*|\*_)!' => "//**$1**//" # creole ## fixed width ,'!=(.+?)=!' => '@@\1@@' ## headings ,'/^(!{1,3})\s?(.*)$/me' => "'$1'=='!' ? '!!! $2' : ('$1'=='!!' ? '!!! $2' : '!! $2' )" ## line breaks ,'/(?/' => "\\\\\\\\\n" ## Definition list style tables ,'/(^[^|\n]*\| *(?:\n(?: *\n)*(?: *[^|\n ][^|\n]*\|?|[^|\n]*\|) *)+)/me' => "pwc_DefStyleTableConvert('$1', \$pagename)" )); SDV($PWC_MarkupConversionsPost, array()); SDV($PHPWikiConvertAction,'phpwikic'); SDV($PHPWikiMarkupTestAction,'phpwikit'); SDV($HandleActions[$PHPWikiConvertAction],'HandlePHPWikiMigrate'); SDV($HandleActions[$PHPWikiMarkupTestAction],'HandlePHPWikiMarkupTest'); if (!is_dir($OutWikiDir) || !is_writable($OutWikiDir)) { echo "OutWikiDir ($OutWikiDir) must be a writable directory!"; exit; } array_unshift($WikiLibDirs, new PageStore($OutWikiDir.'/{$FullName}', 1)); $PWC_MarkupConversions = array_diff_key($PWC_MarkupConversions,$PWC_MarkupConversionsPre,$PWC_MarkupConversionsPost); function HandlePHPWikiMigrate($pagename) { global $FarmD , $InWikiDir, $OutWikiDir , $InEncoding, $OutEncoding , $OutGroup , $PWC_MarkupConversionsPre, $PWC_MarkupConversions, $PWC_MarkupConversionsPost , $PWC_PageNameConversions , $SkipPageFilenames; header('Content-Type: text/html; charset=UTF-8'); iconv_set_encoding('internal_encoding', 'utf-8'); include_once($FarmD.'/scripts/xlpage-utf-8.php'); $infiles = glob("$InWikiDir/*"); if (empty($infiles)) { echo "No files found for $InWikiDir/*\n"; exit; } foreach ($infiles as $k=>$f) { unset($infiles[$k]); $infiles[basename($f)] = $f; } $skippages = MatchPageNames(array_keys($infiles), $SkipPageFilenames); $converted = 0; $skipped = 0; foreach ($infiles as $pagefile=>$pagepath) { if (in_array($pagefile, $skippages)) { $skipped++; pwc_show("match with SkipPageFilenames...", 'skip', $pagefile); continue; } pwc_show('Processing file '.$pagepath."..."); if (!$inpagestr=file_get_contents($pagepath)) { pwc_show("couldn't read it!", 'error', $pagepath); continue; } $pagefile=basename($pagepath); $inpagestr=iconv($InEncoding, $OutEncoding, $inpagestr); $inpagestr=str_replace("\r", '', $inpagestr); $sep=strpos($inpagestr, "\n\n"); $pageattr=substr($inpagestr, 0, $sep); $pagetext=substr($inpagestr, $sep+2); if (empty($pageattr) || empty($pagetext)) { pwc_show("couldn't separate headers from text!", 'error', $pagefile); continue; } if (substr($pageattr, -33)!="Content-Transfer-Encoding: binary") { pwc_show("page attributes ended with an unexpected string", 'error', $pagefile); continue; } $mime=iconv_mime_decode_headers($pageattr); if (isset($mime['Content-Type'])) { $mime=$mime['Content-Type']; $mime=iconv($InEncoding, $OutEncoding, urldecode($mime)); $mime=explode(';', $mime); $page = array(); $pagename=''; foreach ($mime as $m) { if (!$m=explode('=', trim($m), 2)) continue; if (count($m)!=2) continue; list ($k, $v) = $m; switch ($k) { case 'pagename': # PmWiki wants pagenames to start with lowercase letters and have no spaces nor slashes... $pn = $pn0 = urldecode($v); if (!empty($PWC_PageNameConversions)) { $pn = preg_replace(array_keys($PWC_PageNameConversions), array_values($PWC_PageNameConversions), $pn, -1, $rcount); if ($rcount > 0) pwc_show("$pn0 has been renamed to $pn", 'rename', $pagefile); } else $pn = "$OutGroup.$pn"; $pagename=MakePageName("$OutGroup.$OutGroup", $pn); if (empty($pagename)) pwc_show("couldn't convert pagename $pn", 'error', $pagefile); else pwc_show("\tpagename: $pagename"); break; case 'flags': if ($v!='""' && $v!='PAGE_LOCKED') pwc_show("unknown flags found: $v ", 'error', $pagefile); break; case 'author': if (trim($v)=='The PhpWiki programming team') { $skipped++; pwc_show("author string indicates this an untouched sys page...", 'skip', $pagefile); continue 3; } $page['author']=$v; pwc_show("\tauthor: $v "); break; case 'version': $page['rev']=$v; break; case 'lastmodified': $page['time']=$v; break; case 'created': $page['ctime']=$v; break; case 'author_id': if (long2ip(ip2long($v)==$v)) $page['host']=$v; elseif ($page['author']!=$v) pwc_show("author_id!=author ($v!=$page[author])", 'error', $pagefile); break; case 'markup': if ($v!=2) pwc_show("markup!=2", 'error', $pagefile); break; case 'summary': $page['csum']=$v; break; case 'charset': $page['charset'] = 'UTF-8'; if ($v!='iso-8859-1') pwc_show("charset!=iso-8859-1 ($v)", 'error', $pagefile); break; case 'hits': case 'acl': break; default: pwc_show("unknown key: $k ($v)", 'error', $pagefile); } } if (empty($pagename)) { pwc_show("couldn't find pagename attribute!", 'error', $pagefile); continue; } if (empty($page['author'])) { pwc_show("couldn't find author attribute!", 'error', $pagefile); continue; } } else { pwc_show("couldn't process page headers!", 'error', $pagefile); continue; } if (!empty($PWC_MarkupConversionsPre)) $pagetext=preg_replace(array_keys($PWC_MarkupConversionsPre), array_values($PWC_MarkupConversionsPre), $pagetext); $pagetext=preg_replace(array_keys($PWC_MarkupConversions), array_values($PWC_MarkupConversions), $pagetext); $pagetext=MarkupRestore($pagetext); if (!empty($PWC_MarkupConversionsPost)) $pagetext=preg_replace(array_keys($PWC_MarkupConversionsPost), array_values($PWC_MarkupConversionsPost), $pagetext); $page['text'] = $pagetext; $page['name'] = $pagename; ksort($page); $page=array_merge(array('version'=>'pmwiki-2.2.0 ordered=1 urlencoded=1'), $page); if (!pwc_write($pagename, $page)) pwc_show("couldn't save the converted page", 'error', $pagefile); else $converted++; } $missing = count($infiles)-$skipped-$converted; pwc_show("total files considered: ".count($infiles), ''); pwc_show("total converted: $converted", ''); pwc_show('', 'errors'); pwc_show('', 'skips'); pwc_show('', 'renames'); pwc_show('', 'infos'); exit; } ## Formats and display messages function pwc_show($msg, $type='info', $context='') { static $errors=array(), $infos=array(), $skips=array(), $renames=array(); switch ($type) { case 'info': $infos[]=empty($context)?"$msg\n":"$msg ($context)\n"; break; case 'error': $msg = "$msg"; if (!empty($context)) $msg="$msg ($context)"; $errors[]="$msg\n"; break; case 'skip': case 'rename': ${$type.'s'}[]=empty($context)?"$msg\n":"$msg ($context)\n"; break; case 'errors': case 'infos': case 'skips': case 'renames': $count=count(${$type}); echo '
';
    echo "\n$type ($count lines)\n=======================\n"
      .implode('', ${$type});
    echo '
'; break; default: echo "
".print_r($msg, true);
    if (!empty($context)) echo '('.print_r($context, true).')';
    echo "
\n"; } } ## Gets called by ?action=phpwikit (useful for testing markup conversion) function HandlePHPWikiMarkupTest($pagename) { global $PWC_MarkupTestString, $PWC_MarkupConversionsPre, $PWC_MarkupConversions, $PWC_MarkupConversionsPost; header('Content-Type: text/html; charset=UTF-8'); $text=$PWC_MarkupTestString; echo "
before conversion:\n"
    .print_r(htmlspecialchars($text),true);
  
  if (!empty($PWC_MarkupConversionsPre))
    $text=preg_replace(array_keys($PWC_MarkupConversionsPre), array_values($PWC_MarkupConversionsPre), $text);
  
  $text=preg_replace(array_keys($PWC_MarkupConversions), array_values($PWC_MarkupConversions), $text);
  $text=MarkupRestore($text);

  if (!empty($PWC_MarkupConversionsPost))
    $text=preg_replace(array_keys($PWC_MarkupConversionsPost), array_values($PWC_MarkupConversionsPost), $text);

  echo "\n\n\nafter conversion:\n";
  echo htmlspecialchars($text);
  echo '
'; exit; } ## Writes pages to disk function pwc_write($pagename, $page) { global $OutWikiDir; $str=''; foreach ($page as $k=>$v) { if ($k=='text') $v=str_replace(array('%', "\n", '<'), array('%25', '%0a', '%3c'), $v); $str.="$k=$v\n"; } if (file_put_contents("$OutWikiDir/$pagename", $str)) return true; return false; } ## Gets called when converting image links ## (a hook should be added here to enable conversion to gallery recipes). function pwc_convertImageLinks($link, $ext, $title) { $link.=strtolower(".$ext"); # The two lines below could be used to convert some links to Mini: markup #if (preg_match('!/phpwiki/files!', $link)) # return Keep('Mini:'.basename($l).(empty($text)?'':'"'.$text.'"')); if (!empty($title)) $link.='"'.$title.'"'; return Keep($link); } ## Gets called when converting old style tables function pwc_OldStyleTableConvert($table, $pagename) { global $OldStyleTableColPattern, $OldStyleTableColReplacement; SDV($OldStyleTableColPattern, '/\s*\|(v+|>|<|\^|)\s*([^\|$]*)/e'); SDV($OldStyleTableColReplacement, "'||'.(('$2'=='') ? '__' : ( (('$1'=='>' || '$1'=='^') ? ' ' : '') .'$2' .(('$1'=='<' || '$1'=='^') ? ' ' : (substr('$1',0,1)=='v'?str_repeat('+',strlen('$1')+1):'')) ))" ); $table=explode("\n", $table); foreach($table as $k=>&$v) { $v=trim($v); if (empty($v)) unset($table[$k]); } unset($v); $table=preg_replace($OldStyleTableColPattern, $OldStyleTableColReplacement, $table); return ("\n|| class=border\n".implode("||\n", $table)."||\n"); } ## Gets called when converting definition style tables function pwc_DefStyleTableConvert($table, $pagename) { #return "\n<--------------------START--->\n$table\n<---END------------------------>\n"; $lines = explode("\n", $table); foreach ($lines as $lno=>$line) if (trim($line)=='') unset($lines[$lno]); $lcount = count($lines); $lines = array_values($lines); # new row lines - all the lines not starting with space, but ending with '|' $linespec=array(); $prev_nr=0; # line number of the previous new row $ccol=2; # column count for the current row batch $rows=0; $cols=0; # table column count foreach ($lines as $lno=>$line) { $lilen=strlen($line); $linespec[$lno]=array('text'=>trim($line, "| \n")); # count the spaces (indent) $indent = 0; while ($line[$indent]==' ') $indent++; $linespec[$lno]['indent']=$indent; # gather new row lines if ($indent==0 && $line[$lilen-1]=='|') { $linespec[$lno]['newrow']=$lno-$prev_nr; $prev_nr=$lno; $rows++; if ($cols<$ccol) $cols=$ccol; $ccol=2; } # gather the new column lines elseif ($line[$lilen-1]=='|') { $linespec[$lno]['newcol']=1; $ccol++; } } $tablestr = ''; foreach ($linespec as $row) { if (isset($row['newrow'])) $tablestr .= '(:cellnr'; else $tablestr .= '(:cell'; #$tablestr .= '(:'.$row['cmd']; if (isset($row['colspan'])) $tablestr.=' colspan='.($row['colspan']+1); if (isset($row['rowspan'])) $tablestr.=' rowspan='.($row['rowspan']+1); $tablestr .= ':) '.$row['text']."\n"; } return "(:table class=border:)\n$tablestr(:tableend:)"; }