<?php

/***********************************************************
* pmfeed.php, requires PmWiki 2.1                          *
* This is an RSS feed display cookbook recipe for PmWiki.  *
*                                                          *
* Uses (and includes) lastRSS which is a GPL'd php class   *
* for handling RSS feeds.                                  *
* See "copyright" after this boiler plate below...         *
*                                                          *
* Copyright (c) 2006, Chris Cox ccox@airmail.net           *
* All Rights, Reserved.                                    *
*                                                          *
* This program is free software; you can redistribute it   *
* and/or modify it under the terms of the GNU General      *
* Public License as published by the Free Software         *
* Foundation; either version 2 of the License, or (at your *
* option) any later version.                               * 
*                                                          *
* Installation:                                            *
*                                                          *
* Place pmfeed.php (this file) into your cookbook          *
* directory (e.g. /srv/www/htdocs/pmwiki/cookbook)         *
*                                                          *
* Include the cookbook in your local/config.php            *
* include_once('cookbook/pmfeed.php');                     *
*                                                          *
* Create a cache directory at pub/pmfeed under your        *
* your PmWiki base directory.  Needs to be writable by     *
* the web username.                                        *
*                                                          *
* On a page include the markup with a feed url.            *
* (:pmfeed feed='http://www.digg.com/rss/index.xml':)      *
*                                                          *
*                                                          *
-<0.01: Initial release.

-<0.02: Attempt to interpret some HTML tags as PmWiki equivalents.  Enabled for now.

-<0.03: Renamed banner to title and added several "show" options and added \
an unsafe option for some flexibility/risk.

-<0.04: Fixed bug regarding use of newwin=false resulting in bad output (space problem).


->Variables:
     [@
     feed=              URL to RSS feed file. Defaults to pmwiki.org's
                           Site/AllRecentChanges.
     cache_time=        Time to cache data... be nice to the providers.
                           Defaults to 2000 seconds.
     max_count=         Number of items to read.  Defaults to 0.
     newwin=            Open links to items in a new window. Defaults to true.
     overrides=         Set this to false to prevent URL line GET overrides to
                           these parameters.
     showitems=         Defaults to true.  If false, don't show RSS items.
     showitemdescr=     Defaults to true.  If false, don't include the description along with
                           the item.
     showtitle=         Defaults to true.  If false, don't show the feed title.
     title=             Alternate title instead of using RSS title.
     unsafe=            Defaults to false.  If true, allows setting of title and feed
                           from the URL line (_GET).
     @]
***********************************************************/
/*
 ======================================================================
 lastRSS 0.9.1
 
 Simple yet powerfull PHP class to parse RSS files.
 
 by Vojtech Semecky, webmaster @ webdot . cz
 
 Latest version, features, manual and examples:
 	http://lastrss.webdot.cz/

 ----------------------------------------------------------------------
 LICENSE

 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License (GPL)
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 GNU General Public License for more details.

 To read the license please visit http://www.gnu.org/copyleft/gpl.html
 ======================================================================
*/

/**
* lastRSS
* Simple yet powerfull PHP class to parse RSS files.
*/
class lastRSS {
	// -------------------------------------------------------------------
	// Public properties
	// -------------------------------------------------------------------
	var $default_cp = 'UTF-8';
	var $CDATA = 'content';
	var $cp = '';
	var $items_limit = 0;
	var $stripHTML = False;
	var $date_format = '';

	// -------------------------------------------------------------------
	// Private variables
	// -------------------------------------------------------------------
	var $channeltags = array ('title', 'link', 'description', 'language',
		'copyright', 'managingEditor', 'webMaster', 'lastBuildDate',
		'rating', 'docs');
	var $itemtags = array('title', 'link', 'description', 'author',
		'category', 'comments', 'enclosure', 'guid', 'pubDate', 'source');
	var $imagetags = array('title', 'url', 'link', 'width', 'height');
	var $textinputtags = array('title', 'description', 'name', 'link');

	// -------------------------------------------------------------------
	// Parse RSS file and returns associative array.
	// -------------------------------------------------------------------
	function Get ($rss_url) {
		// If CACHE ENABLED
		if ($this->cache_dir != '') {
			$cache_file = $this->cache_dir . '/rsscache_' . md5($rss_url);
			$timedif = @(time() - filemtime($cache_file));
			if ($timedif < $this->cache_time) {
				// cached file is fresh enough, return cached array
				$result = unserialize(join('', file($cache_file)));
				// set 'cached' to 1 only if cached file is correct
				if ($result) $result['cached'] = 1;
			} else {
				// cached file is too old, create new
				$result = $this->Parse($rss_url);
				$serialized = serialize($result);
				if ($f = @fopen($cache_file, 'w')) {
					fwrite ($f, $serialized, strlen($serialized));
					fclose($f);
				}
				if ($result) $result['cached'] = 0;
			}
		}
		// If CACHE DISABLED >> load and parse the file directly
		else {
			$result = $this->Parse($rss_url);
			if ($result) $result['cached'] = 0;
		}
		// return result
		return $result;
	}
	
	// -------------------------------------------------------------------
	// Modification of preg_match(); return trimed field with index 1
	// from 'classic' preg_match() array output
	// -------------------------------------------------------------------
	function my_preg_match ($pattern, $subject) {
		// start regullar expression
		preg_match($pattern, $subject, $out);

		// if there is some result... process it and return it
		if(isset($out[1])) {
			// Process CDATA (if present)
			if ($this->CDATA == 'content') {
				// Get CDATA content (without CDATA tag)
				$out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));
			} elseif ($this->CDATA == 'strip') { // Strip CDATA
				$out[1] = strtr($out[1], array('<![CDATA['=>'', ']]>'=>''));
			}

			// If code page is set convert character encoding to required
			if ($this->cp != '')
//$out[1] = $this->MyConvertEncoding($this->rsscp, $this->cp, $out[1]);
				$out[1] = iconv($this->rsscp, $this->cp.'//TRANSLIT', $out[1]);
			// Return result
			return trim($out[1]);
		} else {
			// if there is NO result, return empty string
			return '';
		}
	}

	// -------------------------------------------------------------------
	// Replace HTML entities &something; by real characters
	// -------------------------------------------------------------------
	function unhtmlentities ($string) {
		// Get HTML entities table
		$trans_tbl = get_html_translation_table (HTML_ENTITIES, ENT_QUOTES);
		// Flip keys<==>values
		$trans_tbl = array_flip ($trans_tbl);
		// Add support for &apos; entity (missing in HTML_ENTITIES)
		$trans_tbl += array('&apos;' => "'");
		// Replace entities by values
		return strtr ($string, $trans_tbl);
	}

	// -------------------------------------------------------------------
	// Parse() is private method used by Get() to load and parse RSS file.
	// Don't use Parse() in your scripts - use Get($rss_file) instead.
	// -------------------------------------------------------------------
	function Parse ($rss_url) {
		// Open and load RSS file
		if ($f = @fopen($rss_url, 'r')) {
			$rss_content = '';
			while (!feof($f)) {
				$rss_content .= fgets($f, 4096);
			}
			fclose($f);

			// Parse document encoding
			$result['encoding'] = $this->my_preg_match("'encoding=[\'\"](.*?)[\'\"]'si",
				$rss_content);
			// if document codepage is specified, use it
			if ($result['encoding'] != '') {
				// This is used in my_preg_match()
				$this->rsscp = $result['encoding'];
			} else {
 				// This is used in my_preg_match()
				// otherwise use the default codepage
				$this->rsscp = $this->default_cp;
			}

			// Parse CHANNEL info
			preg_match("'<channel.*?>(.*?)</channel>'si", $rss_content, $out_channel);
			foreach($this->channeltags as $channeltag) {
				$temp = $this->my_preg_match("'<$channeltag.*?>(.*?)</$channeltag>'si",
					$out_channel[1]);
				if ($temp != '') $result[$channeltag] = $temp; // Set only if not empty
			}
			// If date_format is specified and lastBuildDate is valid
			if ($this->date_format != '' &&
			   ($timestamp = strtotime($result['lastBuildDate'])) !==-1) {
				// convert lastBuildDate to specified date format
				$result['lastBuildDate'] = date($this->date_format, $timestamp);
			}

			// Parse TEXTINPUT info
			preg_match("'<textinput(|[^>]*[^/])>(.*?)</textinput>'si", $rss_content, $out_textinfo);
				// This a little strange regexp means:
				// Look for tag <textinput> with or without any attributes, 
				// but skip truncated version <textinput /> (it's not beggining tag)
			if (isset($out_textinfo[2])) {
				foreach($this->textinputtags as $textinputtag) {
					$temp = $this->my_preg_match("'<$textinputtag.*?>(.*?)</$textinputtag>'si",
						$out_textinfo[2]);
					if ($temp != '') {
						// Set only if not empty
						$result['textinput_'.$textinputtag] = $temp;
					}
				}
			}
			// Parse IMAGE info
			preg_match("'<image.*?>(.*?)</image>'si", $rss_content, $out_imageinfo);
			if (isset($out_imageinfo[1])) {
				foreach($this->imagetags as $imagetag) {
					$temp = $this->my_preg_match("'<$imagetag.*?>(.*?)</$imagetag>'si",
						$out_imageinfo[1]);
					if ($temp != '') {
						// Set only if not empty
						$result['image_'.$imagetag] = $temp;
					}
				}
			}
			// Parse ITEMS
			preg_match_all("'<item(| .*?)>(.*?)</item>'si", $rss_content, $items);
			$rss_items = $items[2];
			$i = 0;
			$result['items'] = array(); // create array even if there are no items
			foreach($rss_items as $rss_item) {
				// If number of items is lower then limit: Parse one item
				if ($i < $this->items_limit || $this->items_limit == 0) {
					foreach($this->itemtags as $itemtag) {
						$temp = $this->my_preg_match("'<$itemtag.*?>(.*?)</$itemtag>'si", $rss_item);
						if ($temp != '') $result['items'][$i][$itemtag] = $temp; // Set only if not empty
					}
					// Strip HTML tags and other stuff from DESCRIPTION
					if ($this->stripHTML && $result['items'][$i]['description'])
						$result['items'][$i]['description'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['description'])));
					// Strip HTML tags and other stuff from TITLE
					if ($this->stripHTML && $result['items'][$i]['title'])
						$result['items'][$i]['title'] = strip_tags($this->unhtmlentities(strip_tags($result['items'][$i]['title'])));
					// If date_format is specified and pubDate is valid
					if ($this->date_format != '' && ($timestamp = strtotime($result['items'][$i]['pubDate'])) !==-1) {
						// convert pubDate to specified date format
						$result['items'][$i]['pubDate'] = date($this->date_format, $timestamp);
					}
					// Item counter
					$i++;
				}
			}

			$result['items_count'] = $i;
			return $result;
		} else {
			// Error in opening return False
			return False;
		}
	}
}
/**
* End lastRSS
*/

/* pmfeed */
Markup('pmfeed','directives',"/^\(:pmfeed[ 	]*(.*?):\)\s*$/e",
	"pmfeed('$1')");

// For users prior to PHP 4.3.0 you may do this:
function pmfeedunhtmlentities($string) {
	// replace numeric entities
	$string = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $string);
	$string = preg_replace('~&#([0-9]+);~e', 'chr(\\1)', $string);
	// replace literal entities
	$trans_tbl = get_html_translation_table(HTML_ENTITIES);
	$trans_tbl = array_flip($trans_tbl);
	return strtr($string, $trans_tbl);
}

function pmfeed($opts) {
	global $FarmD, $pagename, $MaxIncludes, $HTMLHeaderFmt, $PmFeedCacheDir,
		$PmFeedTitleMark, $PmFeedItemMark, $PmFeedDescrBold,
		$PmFeedDescrItalic, $PmFeedDescrUnderline, $PmFeedDescrSmall,
		$PmFeedDescrStrong, $PmFeedMarkupReplace;

	SDV($PmFeedCacheDir, $FarmD.'/pub/pmfeed');
	//Initial PmWiki markup for feed title
	SDV($PmFeedTitleMark, '!!');
	//Initial PmWiki markup for items
	SDV($PmFeedItemMark, '*');
	//Convert bold tags
	SDV($PmFeedDescrBold, '\'\'\'$1\'\'\'');
	//Convert italic tags
	SDV($PmFeedDescrItalic, '\'\'$1\'\'');
	//Convert underline tags
	SDV($PmFeedDescrUnderline, '{+$1+}');
	//Convert small tags
	SDV($PmFeedDescrSmall, '[-$1-]');
	//Convert strong tags: <strong>text</strong> to '''''text'''''
	SDV($PmFeedDescrStrong, '\'\'\'\'\'$1\'\'\'\'\'');
	//Attempt to escape markups special to PmWiki that might be found in description
	SDV($PmFeedMarkupReplace, '/([#*])/');

	// Determine this Group
	//
	$group = FmtPageName('$Group',$pagename);
	$name = FmtPageName('$Name',$pagename);

	// Process markup arguments first
	//
	$defaults = array(
		'feed'=>'http://www.pmwiki.org/wiki/Site/AllRecentChanges?action=rss',
		'title'=>'',
		'cache_time'=>2000,
		'max_count'=>0,
		'showitems'=>'true',
		'showitemdescr'=>'true',
		'showtitle'=>'true',
		'showfeedxmllink'=>'false',
		'newwin'=>'true',
		'overrides'=>'true',
		'unsafe'=>'false'
	);

	$args = array_merge($defaults, ParseArgs($opts));
	$urladd='';


	// Allows overrides=false in the :pmfeed: markup to disallow
	// settings on the URL line.
	//
	$overrides = $args['overrides'];
	if ($overrides == 'false') {
		$_GET = NULL;
	}

	$cache_time = isset($_GET['cache_time']) ? $_GET['cache_time'] :
		$args['cache_time'];
	if (isset($_GET['cache_time']))
		$urladd.="&amp;cache_time=".urlencode($_GET['cache_time']);
	$showitemdescr = isset($_GET['showitemdescr']) ? $_GET['showitemdescr'] :
		$args['showitemdescr'];
	if (isset($_GET['showitemdescr']))
		$urladd.="&amp;showitemdescr=".urlencode($_GET['showitemdescr']);
	$showitems = isset($_GET['showitems']) ? $_GET['showitems'] :
		$args['showitems'];
	if (isset($_GET['showitems']))
		$urladd.="&amp;showitems=".urlencode($_GET['showitems']);
	$showtitle = isset($_GET['showtitle']) ? $_GET['showtitle'] :
		$args['showtitle'];
	if (isset($_GET['showtitle']))
		$urladd.="&amp;showtitle=".urlencode($_GET['showtitle']);
	$showfeedxmllink = isset($_GET['showfeedxmllink']) ? $_GET['showfeedxmllink'] :
		$args['showfeedxmllink'];
	if (isset($_GET['showfeedxmllink']))
		$urladd.="&amp;showfeedxmllink=".urlencode($_GET['showfeedxmllink']);
	$max_count = isset($_GET['max_count']) ? $_GET['max_count'] :
		$args['max_count'];
	if (isset($_GET['max_count']))
		$urladd.="&amp;max_count=".urlencode($_GET['max_count']);
	$newwin= isset($_GET['newwin']) ? $_GET['newwin'] : $args['newwin'];
	if (isset($_GET['newwin']))
		$urladd.="&amp;newwin=".urlencode($_GET['newwin']);

	// For unsafe (?) things
	// Enabling unsafe would allow you to create a totally user driven
	// feed.  It's possible that some "nasty" markup could come in via
	// title if made user modifiable.
	$unsafe=$args['unsafe'];
	if ($unsafe == 'true') {
		$feed= isset($_GET['feed']) ? $_GET['feed'] : $args['feed'];
		if (isset($_GET['feed']))
			$urladd.="&amp;feed=".urlencode($_GET['feed']);
		$title= isset($_GET['title']) ? $_GET['title'] : $args['title'];
		if (isset($_GET['title']))
			$urladd.="&amp;title=".urlencode($_GET['title']);
	} else {
		$title=$args['title'];
		$feed=$args['feed'];
	}

	$out='';
	// Create lastRSS object
	$rss = new lastRSS;

	// $rss->stripHTML = True;

	// Set cache dir and cache time limit 
	// (don't forget to chmod cache to allow writing)
	$rss->cache_dir = $PmFeedCacheDir;
	$rss->cache_time = $cache_time;

	$out.="\n";
	if ($newwin == 'true') {
		$pmnewwin='newwin ';
	} else {
		$pmnewwin='';
	}
	// Try to load and parse RSS file
	$feed=str_replace('&amp;', '&', $feed);
	if ($rs = $rss->get($feed)) {
	    	// Show title or  clickable website rss title if not supplied
		if ($showtitle == 'true') {
			if ($title != '') {
    				$out.="\n$PmFeedTitleMark %".$pmnewwin."class=pmfeedtitle%[[$rs[link]|$title\"$rs[title]\"]]";
			} else {
    				$out.="\n$PmFeedTitleMark %".$pmnewwin."class=pmfeedtitle%[[$rs[link]|$rs[title]]]";
			}
		}

		//Hack to convert destroyed german 'umlauts' (�, �, �, �, �, �, �(= double s in german)) back.
		//Works on the original Feed name!
		//(Axel)
		$out=preg_replace('/ü/', '�', $out); //�
		$out=preg_replace('/Ä/', '�', $out); //�
		$out=preg_replace('/ä/', '�', $out); //�
		$out=preg_replace('/Ö/', '�', $out); //�
		$out=preg_replace('/ö/', '�', $out); //�
		$out=preg_replace('/ß/', '�', $out); //�

 
		// Display a link to the original feed xml
		if ($showfeedxmllink == 'true') {
    			$out.="[[$feed|(XML)]]";
		}
		$out.="\n";
	
    		// Show last published articles (title, link, description)
		if ($showitems == 'true') {
			$i=0;
    			foreach($rs['items'] as $item) {
				if ($max_count && $i >= $max_count) {
					break;
				}
				$pmfeedtitle=preg_replace('/\n/','',strip_tags(pmfeedunhtmlentities($item['title'])));
				$pmfeedlink=$item['link'];
				if ($pmfeedlink == '') {
					$pmfeeditem="$PmFeedItemMark %class=pmfeeditem%".$pmfeedtitle;
				} else {
					$pmfeeditem="$PmFeedItemMark %".$pmnewwin."class=pmfeeditem%[[$pmfeedlink|$pmfeedtitle]]";
				}
				//Hack to convert destroyed german 'umlauts' (�, �, �, �, �, �, �(= double s in german)) back.
				//Works on the news headers!
				//(Axel)
				$pmfeeditem=preg_replace('/ü/', '�', $pmfeeditem); //�
				$pmfeeditem=preg_replace('/Ä/', '�', $pmfeeditem); //�
				$pmfeeditem=preg_replace('/ä/', '�', $pmfeeditem); //�
				$pmfeeditem=preg_replace('/Ö/', '�', $pmfeeditem); //�
				$pmfeeditem=preg_replace('/ö/', '�', $pmfeeditem); //�
				$pmfeeditem=preg_replace('/ß/', '�', $pmfeeditem); //�

				$out.="$pmfeeditem\n";
				if ($showitemdescr == 'true') {
					$d=pmfeedunhtmlentities($item['description']);
	
					// Get rid of initial para or br
					$d=preg_replace('/^<p>/i','', $d);
					$d=preg_replace('/^<[bh]r \/>/i','', $d);
	
					// Attempt to convert <a href="url">name</a> to [[url|name]]
					$d=preg_replace('/<a\s+.*?href="([^"]+)"[^>]*>([^<]+)<\/a>/is', '[[$1|$2]]', $d);
	
					//Convert bold tags
					$d=preg_replace('/<b>(.*?)<\/b>/', $PmFeedDescrBold, $d);
	
					//Convert italics tags
					$d=preg_replace('/<i>(.*?)<\/i>/', $PmFeedDescrItalic, $d);
	
					//Convert underline tags
					$d=preg_replace('/<u>(.*?)<\/u>/', $PmFeedDescrUnderline, $d);
	
					//Convert small tags
					$d=preg_replace('/<small>(.*?)<\/small>/', $PmFeedDescrSmall, $d);
	
					//Convert strong tags: <strong>text</strong> to '''''text'''''
					////$d=preg_replace('/<strong>(.*?)<\/strong>/', $PmFeedDescrStrong, $d);
	
					//Convert breaks and paragraphs
					$d=preg_replace('/<hr \/>/', '\\\\\\\\'."\n", $d);
					$d=preg_replace('/<br \>/', '\\\\'."\n", $d);
	//				$d=preg_replace('/<br>/', "\n".'\\\\\\', $d);
					$d=preg_replace('/<p>/', '\\\\\\\\'."\n", $d);
	
					//Hack to hide PmWiki Markup, pound and asterisk
					//... sigh, there could be a ton of these.
					$d=preg_replace($PmFeedMarkupReplace, '[=$1=]', $d);
	
					// Slaughter the rest of the html tags
					$d="%block class=indent ".$pmnewwin."%".ltrim(strip_tags($d))."%%\n";
					$d=preg_replace('/^\s+/', '', $d);
					
					//Hack to convert destroyed german 'umlauts' (�, �, �, �, �, �, �(= double s in german)) back.
					//Works on new's bodies!
					//(Axel)
					$d=preg_replace('/ü/', '�', $d); //�
					$d=preg_replace('/Ä/', '�', $d); //�
					$d=preg_replace('/ä/', '�', $d); //�
					$d=preg_replace('/Ö/', '�', $d); //�
					$d=preg_replace('/ö/', '�', $d); //�
					$d=preg_replace('/ß/', '�', $d); //�


					$out.=$d;

				}
				$i++;
       			}
		}
	} else {
    		$out.="Error: It's not possible to reach RSS file $feed ...\n";
	}

	PRR(); return $out;
}