10, 'OldCharset' => 'Windows-1252', 'TempSuffux' => '-temp-utf8', 'BatchCheckTime' => 20, )); SDVA($MigrateUTF8['charsets'], array( 'Windows-1252' => 'Latin', 'ISO-8858-1' => 'Latin', 'ISO-8858-15' => 'Latin+Euro', 'ISO-8858-2' => 'Central Europe', 'ISO-8858-9' => 'Turkish', 'ISO-8858-13' => 'Baltic', 'Windows-1251' => 'Cyrillic', 'ISO-8858-5' => 'Cyrillic', )); SDVA($HandleActions, array('migr8'=>'HandleMigrateUTF8')); $MarkupDirectiveFunctions['migrate-charsets'] = 'FmtMigrateCharsets'; function FmtMigrateCharsets($pagename) { global $MigrateUTF8; $out = ''; foreach($MigrateUTF8['charsets'] as $charset => $label) { $c = PHSC($charset, ENT_QUOTES); $l = PHSC($label, ENT_QUOTES); $out .= "(:input select oldcharset \"$c\" \"$c ($l)\":)"; } return $out; } function HandleMigrateUTF8($pagename) { global $Now, $EnableReadOnly, $Charset, $SiteAdminGroup, $MigrateUTF8, $WorkDir, $UploadDir, $PageIndexFile, $MessagesFmt, $EnableMigratePartial, $EnableMigrateHasUploads, $PageStartFmt, $PageEndFmt, $HTMLStylesFmt, $EnableMigrateDone; $pn = "$SiteAdminGroup.$SiteAdminGroup"; $page = RetrieveAuthPage($pn, 'admin', true, READPAGE_CURRENT); if(!$page) return Abort('?No permissions.'); $todo = ""; if(!$EnableReadOnly) $todo .= "@@$EnableReadOnly = 1;@@\\\\\n"; if($Charset != 'UTF-8') $todo .= "@@include_once(\"$FarmD/scripts/xlpage-utf-8.php\");@@\n"; if($todo) { $out = <<>hlt php indent<< $todo >><< Then [[$pagename?action=migr8|refresh this page]]. EOF; return MigratePrintFmt($pagename, $out); } if(is_dir("$WorkDir,old") || is_dir("$UploadDir,old")) { $out = <<=$endtime) { $EnableMigratePartial = 1; break; } $d = array_shift($dirs); $ed = mig_hlt_new($d); mkdirp($d); $done .= "* Created directory \"$ed\"\n"; if(@$MigrateUTF8['TestPause']) sleep($MigrateUTF8['TestPause']); } $wdlen = strlen($WorkDir); $newindex = "$WorkDir$suffix" . substr($PageIndexFile, $wdlen); if(time()<$endtime && !file_exists($newindex) && file_exists($PageIndexFile)) { $ofp = fopen($newindex, 'w'); $ifp = @fopen($PageIndexFile, 'r'); if ($ifp) { while (!feof($ifp)) { $line = fgets($ifp, 4096); while (substr($line, -1, 1) != "\n" && !feof($ifp)) $line .= fgets($ifp, 4096); $line = pm_recode($line, $oldcharset, 'UTF-8'); fputs($ofp, $line); } fclose($ifp); } fclose($ofp); fixperms($newindex); $done .= "* Recoded $PageIndexFile=>$newindex\n"; } foreach($files as $source=>$dest) { if(++$i%$bct == 0 && time()>=$endtime) { $EnableMigratePartial = 1; break; } $esource = mig_hlt_old($source); $edest = mig_hlt_new($dest); if(substr($source, -9) == '.htaccess') { if(copy($source, $dest)) { fixperms($dest); $done .= "* Copied \"$esource\"=>\"$edest\"\n"; unset($files[$source]); } else { $done .= "* %item red% Could not copy \"$esource\"=>\"$edest\"\n"; } } elseif(rename($source, $dest)) { $done .= "* Moved \"$esource\"=>\"$edest\"\n"; if($dofixperms) fixperms($dest); unset($files[$source]); } else { $done .= "* %item red% Could not move \"$esource\"=>\"$edest\"\n"; } if(@$MigrateUTF8['TestPause']) sleep($MigrateUTF8['TestPause']); } if(count($files) == 0) { $words = $dirs = array(); rename($WorkDir, "$WorkDir,old"); rename($UploadDir, "$UploadDir,old"); rename("$WorkDir$suffix", $WorkDir); rename("$UploadDir$suffix", $UploadDir); $done .= "* Old '$WorkDir' moved to %red%'$WorkDir,old'%%\n"; $done .= "* Old '$UploadDir' moved to %red%'$UploadDir,old'%%\n"; $done .= "* New '$WorkDir$suffix' moved to %green%'$WorkDir'%%\n"; $done .= "* New '$UploadDir$suffix' moved to %green%'$UploadDir'%%\n"; if ($LastModFile && !@touch($LastModFile)) { unlink($LastModFile); touch($LastModFile); fixperms($LastModFile); } $EnableMigrateDone = 1; } } $tableheaders = "|| class=\"simpletable sortable filterable\"\n||!Old ||!New ||\n"; $tablerenamed = $tablemoved = $tabledirs = $tablewords = ''; foreach($words as $old => $new) { $markedold = mig_hlt_old($old); $markednew = mig_hlt_new($new); $tablewords.= "||@@$markedold@@ ||@@$markednew@@ ||\n"; } if($tablewords) $tablewords = "$tableheaders$tablewords"; else $tablewords = "Nothing to transpose."; $newdirs = ""; foreach($dirs as $d) { $d = mig_hlt_new($d); $newdirs .= "# $d\n"; } if($newdirs) { $cntdirs = count($dirs); $newdirs = "(:details summary='$cntdirs new directories to be created':)\n$newdirs\n(:detailsend:)\n"; } foreach($files as $old => $new) { if(preg_match('/[\\x80-\\xff]+/', $old)) { $markedold = mig_hlt_old($old); $markednew = mig_hlt_new($new); $tablerenamed .= "||@@$markedold@@ ||@@$markednew@@ ||\n"; $cntrenamed++; } else { $tablemoved .= "||@@$old@@ ||@@$new@@ ||\n"; $cntmoved ++; } } if($tablerenamed) { $tablerenamed = "(:details summary='$cntrenamed files with international filenames to be renamed (recoded)':)\n$tableheaders$tablerenamed\n(:detailsend:)\n"; } if($tablemoved) { $tablemoved = "(:details summary='$cntmoved files with ASCII filenames to be moved (no recode)':)\n$tableheaders$tablemoved\n(:detailsend:)\n"; } $out = <<>hlt php<< $UploadNameChars = "-\\\\p{L}\\\\p{N}_. "; $MakeUploadNamePatterns = array( "/[^$UploadNameChars]/u" => '', '/(\\\\.[^.]*)$/' => 'cb_tolower', '/^[^_\\\\p{L}\\\\p{N}]+/u' => '', '/[^_\\\\p{L}\\\\p{N}]+$/u' => '' ); >><< See also PmWiki:UTF-8. (:if2end:) (:div2end:) (:else:) (:if2 enabled EnableMigratePartial:) (:div2 class=frame:) !!! Partial migration $done '''Migration paused at configured time limit. Please review and resubmit the form below to continue.''' (:div2end:) (:if2:) This function will migrate a wiki content from an old 8-bit encoding to UTF-8. The semi-automated function will perform these steps: # Temporary directories will be created for wiki page files ($WorkDir%green%$suffix%%) and attachments ($UploadDir%green%$suffix%%). # $PageIndexFile will be recoded to UTF-8. # All page files and attachments will be moved to the temporary directories, recoding their filenames where necessary. PmWiki cache files are skipped. ** If the processing takes more than {$MigrateUTF8['MaxSeconds']} seconds, it will stop, and you will need to repost the form below to continue from where it stopped. # The old directories will be renamed to "$WorkDir%red%,old%%" and "$UploadDir%red%,old%%" and the temporary directories will be moved to their names. Note, this only recodes the directory names, and filenames of page files and attachments. Normally PmWiki should recode page content automatically (text, metadata, and history). !! Preview recoding of filenames Please preview unique words to be replaced: $tablewords $newdirs $tablerenamed $tablemoved Please preview the above replacements. If you see any errors, you should try different source encodings in the form below, and press "Preview", until you find the correct one. (:input form action="{\$PageUrl}?action=migr8" method=post class=frame:) (:input hidden n $pagename:)(:input hidden action migr8:) (:input default request=1:) !! Control panel (:notoc:) !!!! Select source encoding: (:migrate-charsets:)   (:input submit preview Preview formnovalidate=formnovalidate:) \\\\ If some or all of the above recoded samples are wrong, please try different encodings. (:input checkbox fixperms 1 "Fix file permissions after rename/move (slower, only enable if evidence for inaccessible pages or files)":) !!!! Please confirm these statements to continue: (:input checkbox have_backups 1 "I have a recent backup snapshot in case something breaks and the wiki needs to be reinstalled" required=required:) (:input checkbox read_docs 1 "I have read the documentation at" required=required:) Cookbook:MigrateUTF8 (:input checkbox seen_preview 1 "I have reviewed the recoded strings above and they are correct" required=required:) (:input submit recode "Recode (cannot be undone)" data-pmconfirm="This cannot be undone, except from your backups. Proceed with recoding?":) (:input end:) (:ifend:) EOF; MigratePrintFmt($pagename, $out); } function mig_hlt_old($str) { return preg_replace_callback('/[\\x80-\\xff]+/', "cb_markintl", $str); } function mig_hlt_new($new) { return preg_replace('/[\\x80-\\xff]+/', "%green bgcolor=yellow%$0%%", $new); } function MigratePrintFmt($pagename, $out) { global $PageStartFmt, $PageEndFmt; DisableSkinParts('Header Footer Left Right Title'); $markup = "markup:(:messages:) ! Migrating {\$WikiTitle} to UTF-8 Please review the documentation at Cookbook:MigrateUTF8 before using this tool. ---- $out ---- Please contact me at Cookbook:MigrateUTF8-Talk if you have any questions, difficulties, or suggestions for improvements. "; $fmt = array($PageStartFmt, "
", $markup, "
", $PageEndFmt); session_write_close(); PrintFmt($pagename, $fmt); exit; } function cb_markintl($m) { $bytes = $m[0]; $encoded = ''; for($i=0; $i$i) { if(!is_dir($dir)) $newdirs[] = $dir; } return array($files, $newdirs, $words); } function preparedir($base, $oldcharset) { if(! is_dir($base)) return array(array(), array(), array()); $dlen = strlen($base); $files = getdir_recursive($base, $oldcharset); $words = intlwords(array_keys($files), $oldcharset); $dirs = array(); foreach($files as $path) { $dir = preg_replace('!/[^/]+$!', '', $path); $dirs[$dir] = 1; } return array($files, $dirs, $words); } function intlwords($a, $oldcharset) { $words = []; foreach($a as $line) { $x = preg_split('![-/., ]!', $line); foreach($x as $w) { if(@$words[$w]) continue; if(preg_match('/[^-a-zA-Z0-9]/', $w)) $words[$w] = pm_recode($w, $oldcharset, 'UTF-8'); } } return $words; } function getdir_recursive($dir, $oldcharset) { global $PageIndexFile, $MigrateUTF8; $newdir = preg_replace('!^[^/]+!', "$0{$MigrateUTF8['TempSuffux']}", $dir); $out = []; $dirp = @opendir($dir); while (($file=readdir($dirp)) !== false) { if ($file=='.' || $file=='..') continue; if (substr($file, -6) == ',cache') continue; $path = "$dir/$file"; if($PageIndexFile == $path) continue; if (is_dir($path)) $out = array_merge($out, getdir_recursive($path, $oldcharset)); else { $newpath = pm_recode("$newdir/$file", $oldcharset, 'UTF-8'); if(!file_exists($newpath)) $out[$path] = $newpath; } } closedir($dirp); return $out; } # This class can actually recode an 8-bit PageStore directory on the fly, # however the installation and config.php modifications are more complex # than the current MigrateUTF8 one-time operation. Also, it doesn't help # for uploads with international characters in their paths and filenames. class PageStore8bit extends PageStore { function PFE($f) { # pagefile_encode global $Charset8bit; return pm_recode($f,'UTF-8',$Charset8bit); } function PFD($f) { # pagefile_decode global $Charset8bit; return pm_recode($f,$Charset8bit,'UTF-8'); } function write($pagename,$page) { # shouldn't happen return Abort('?Read only page storage'); } }