mirror of
				https://github.com/e107inc/e107.git
				synced 2025-10-27 03:31:47 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			476 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			476 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| /*
 | |
|  * e107 website system
 | |
|  *
 | |
|  * Copyright (C) 2008-2021 e107 Inc (e107.org)
 | |
|  * Released under the terms and conditions of the
 | |
|  * GNU General Public License (http://www.gnu.org/licenses/gpl.txt)
 | |
|  *
 | |
|  */
 | |
| 
 | |
| 
 | |
| 
 | |
| if (!defined('e107_INIT')) { exit; }
 | |
| 
 | |
| 
 | |
| class linkwords_parse
 | |
| {
 | |
| 	protected $lw_enabled = FALSE;		// Default to disabled to start
 | |
| 	protected $lwAjaxEnabled = FALSE;		// Adds in Ajax-compatible links
 | |
| 	protected $utfMode	= '';			// Flag to enable utf-8 on regex
 | |
| 	protected $cache = true;
 | |
| 	protected $suppressSamePageLink = false;
 | |
| 	protected $hash;
 | |
| 	protected $admin = false;
 | |
| 
 | |
| 	protected $word_list 	= array();		// List of link words/phrases
 | |
| 	private $link_list	= array();		// Corresponding list of links to apply
 | |
| 	private $ext_list	= array();		// Flags to determine 'open in new window' for link
 | |
| 	private $tip_list 	= array();		// Store for tooltips
 | |
| 	private $rel_list   = array();
 | |
| 	private $LinkID		= array();		// Unique ID for each linkword
 | |
| 	private $area_opts;		// Process flags for the various contexts
 | |
| 	private $block_list;		// Array of 'blocked' pages
 | |
| 
 | |
| 	protected $word_class = array();
 | |
| 
 | |
| 	protected $customClass  = '';
 | |
| 	protected $wordCount    = array();
 | |
| 	protected $intLinks = 0;
 | |
| 	protected $extLinks = 0;
 | |
| 	protected $word_limit   = array();
 | |
| 
 | |
| 	const LW_CACHE_TAG = 'linkwords';
 | |
| //	protected $maxPerWord   = 3;
 | |
| 
 | |
| 
 | |
| 	/* constructor */
 | |
| 	function __construct()
 | |
| 	{
 | |
| 
 | |
| 		// See whether they should be active on this page - if not, no point doing anything!
 | |
| 		if(e_ADMIN_AREA === true && empty($_POST['runLinkwordTest']))
 | |
| 		{
 | |
| 			return;
 | |
| 		}
 | |
| 
 | |
| 		$this->init();
 | |
| 
 | |
| 	}
 | |
| 
 | |
| 	public function enable()
 | |
| 	{
 | |
| 		$this->lw_enabled = true;
 | |
| 	}
 | |
| 
 | |
| 	public function cache($var)
 | |
| 	{
 | |
| 		$this->cache = (bool) $var;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	public function setWordData($arr = array())
 | |
| 	{
 | |
| 		foreach($arr as $val)
 | |
| 		{
 | |
| 			$this->word_list[]  = $val['word'];
 | |
| 			$this->link_list[]  = varset($val['link']);
 | |
| 			$this->ext_list[]   = varset($val['ext']);
 | |
| 			$this->tip_list[]   = varset($val['tip']);
 | |
| 			$this->word_limit[] = varset($val['limit']);
 | |
| 			$this->LinkID[]     = varset($val['id']);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	public function setAreaOpts($arr = array())
 | |
| 	{
 | |
| 		$this->area_opts = $arr;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	public function setLink($arr)
 | |
| 	{
 | |
| 		$this->word_list = $arr;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	private function loadRow($lw, $row)
 | |
| 	{
 | |
| 		$lw = trim($lw);
 | |
| 
 | |
| 		if(empty($lw))
 | |
| 		{
 | |
| 			return;
 | |
| 		}
 | |
| 
 | |
| 		$frm = e107::getForm();
 | |
| 
 | |
| 		$this->word_list[]      = $lw;
 | |
| 		$this->word_class[]     = 'lw-'.$frm->name2id($lw);
 | |
| 		$this->word_limit[]     = vartrue($row['linkword_limit'],3);
 | |
| 		$this->link_list[]      = $row['linkword_link'];
 | |
| 		$this->tip_list[]       = $row['linkword_tooltip'];
 | |
| 		$this->ext_list[]       = $row['linkword_newwindow'];
 | |
| 		$this->rel_list[]       = str_replace(",", ' ', $row['linkword_rel']);
 | |
| 		$this->LinkID[]         = max($row['linkword_tip_id'], $row['linkword_id']);		// If no specific ID defined, use the DB record ID
 | |
| 
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	public function toHTML($text,$area = 'olddefault')
 | |
| 	{
 | |
| 
 | |
| 		if(is_string($this->area_opts))
 | |
| 		{
 | |
| 			$this->area_opts = e107::unserialize($this->area_opts);
 | |
| 		}
 | |
| 
 | |
| 		if($this->area_opts === null)
 | |
| 		{
 | |
| 			$this->area_opts = array();
 | |
| 		}
 | |
| 
 | |
| 		if (!$this->lw_enabled || empty($this->area_opts) || !isset($this->area_opts[$area]))
 | |
| 		{
 | |
| 			// e107::getDebug()->log("Link words skipped on ".substr($text, 0, 50));
 | |
| 		    return $text;		// No linkwords in disabled areas
 | |
| 		}
 | |
| 
 | |
| 		// Split up by HTML tags and process the odd bits here
 | |
| 		$ptext = "";
 | |
| 		$lflag = false;
 | |
| 		$cflag = false; // commented code prsent.
 | |
| 
 | |
| 		// Shouldn't need utf-8 on next line - just looking for HTML tags
 | |
| 		$content = preg_split('#(<.*?>)#mis', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
 | |
| 
 | |
| 		$range = range(1,5);
 | |
| 
 | |
| 		foreach($content as $cont)
 | |
| 		{
 | |
| 			if(strpos($cont, '-->') !== false)
 | |
| 			{
 | |
| 				$cflag = false; // end of commented code
 | |
| 				$ptext .= $cont;
 | |
| 				continue;
 | |
| 			}
 | |
| 
 | |
| 			if ($cont[0] === "<")  // Its some HTML
 | |
| 			{
 | |
| 				$ptext .= $cont;
 | |
| 				if (strpos($cont, "<!--") === 0) $cflag = true; // start of commented code.
 | |
| 				if (strpos($cont, "<a") === 0) $lflag = true; // start of link
 | |
| 				if (strpos($cont, "</a") === 0) $lflag = false; // end of link.
 | |
| 
 | |
| 				if($area === 'BODY' && !isset($this->area_opts['TITLE'])) // disable linking on header tag content unless enabled in prefs.
 | |
| 				{
 | |
| 					// loop thru <h1>, <h2> etc.
 | |
| 					foreach($range as $c)
 | |
| 					{
 | |
| 						$hOpenTag = '<h'.$c;
 | |
| 						$hCloseTag = '</h'.$c;
 | |
| 						if (strpos($cont, $hOpenTag) === 0) $lflag = true;
 | |
| 						if (strpos($cont, $hCloseTag) === 0) $lflag = false;
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 			}
 | |
| 			else   // Its the text in between
 | |
| 			{
 | |
| 				if ($lflag || $cflag) // Its probably within a link - leave unchanged
 | |
| 				{
 | |
| 					$ptext .= $cont;
 | |
| 				}
 | |
| 				else
 | |
| 				{
 | |
| 					if (trim($cont))
 | |
| 					{
 | |
| 						// Some non-white space - worth word matching
 | |
| 						$ptext .= $this->linksproc($cont,0,count($this->word_list));
 | |
| 						// echo "Check linkwords: ".count($this->word_list).'<br />';
 | |
| 					}
 | |
| 					else
 | |
| 					{
 | |
| 						$ptext .= $cont;
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 	//	print_a($this->wordCount);
 | |
| 		return $ptext;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	/**
 | |
| 	 * This function is called recursively - it splits the text up into blocks - some containing a particular linkword
 | |
| 	 * @param $text
 | |
| 	 * @param $first
 | |
| 	 * @param $limit
 | |
| 	 * @return string
 | |
| 	 */
 | |
| 	function linksproc($text,$first,$limit)
 | |
| 	{
 | |
| 		$tp = e107::getParser();
 | |
| 
 | |
| 		for (; $first < $limit; $first ++)
 | |
| 		{
 | |
| 			if (empty($this->word_list[$first])) continue;
 | |
| 			if (strpos($tp->ustrtolower($text), $tp->ustrtolower($this->word_list[$first])) !== false) break;
 | |
| 		}
 | |
| 
 | |
| 		if ($first == $limit)
 | |
| 		{
 | |
| 			 return $text;		// Return if no linkword found
 | |
| 		}
 | |
| 
 | |
| 		// There's at least one occurrence of the linkword in the text
 | |
| 		// Prepare all info once only
 | |
| 		// If supporting Ajax, use the following:
 | |
| 		// <a href='link url' rel='external linkwordId::122' class='linkword-ajax'>
 | |
| 		// linkwordId::122 is a unique ID
 | |
| 
 | |
| 		$ret = '';
 | |
| 		$linkwd = '';
 | |
| 		$linkrel = array();
 | |
| 		//		$linkwd = "href='#' ";				// Not relevant for Prototype, but needed with 'pure' JS to make tooltip stuff work - doesn't find link elements without href
 | |
| 		$lwClass  = array();
 | |
| 		$lw = $this->word_list[$first];		// This is the word we're matching - in lower case in our 'master' list
 | |
| 		$tooltip = '';
 | |
| 
 | |
| 		if ($this->tip_list[$first])
 | |
| 		{	// Got tooltip
 | |
| 			if ($this->lwAjaxEnabled)
 | |
| 			{
 | |
| 				$linkrel[] = 'linkwordID::'.$this->LinkID[$first];
 | |
| 				$lwClass[] = 'lw-ajax '.$this->customClass;
 | |
| 			}
 | |
| 			else
 | |
| 			{
 | |
| 				$tooltip = " title=\"{$this->tip_list[$first]}\" ";
 | |
| 				$lwClass[] = 'lw-tip '.$this->customClass;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		if ($this->link_list[$first])  // Got link
 | |
| 		{
 | |
| 			$newLink = $tp->replaceConstants($this->link_list[$first], 'full');
 | |
| 
 | |
| 			if (!$this->suppressSamePageLink || (($newLink !== e_REQUEST_URL) && $newLink !== e_REQUEST_URI))
 | |
| 			{
 | |
| 				$linkwd = " href=\"".$newLink."\" ";
 | |
| 
 | |
| 				if(!empty($this->rel_list[$first]))
 | |
| 				{
 | |
| 					$linkrel[] = $this->rel_list[$first];
 | |
| 				}
 | |
| 				elseif($this->ext_list[$first]) // Determine external links
 | |
| 				{
 | |
| 					$linkrel[] = 'noopener external';
 | |
| 				}
 | |
| 
 | |
| 				$lwClass[] = 'lw-link '.$this->customClass;
 | |
| 			}
 | |
| 		}
 | |
| 		elseif(!empty($this->word_class[$first]))
 | |
| 		{
 | |
| 			$lwClass[] = $this->word_class[$first];
 | |
| 		}
 | |
| 
 | |
| 	//	if (!count($lwClass))
 | |
| 	//	{
 | |
| 			//	return $this->linksproc($sl,$first+1,$limit);		// Nothing to do - move on to next word (shouldn't really get here)
 | |
| 	//	}
 | |
| 
 | |
| 		if (count($linkrel))
 | |
| 		{
 | |
| 			$linkwd .= " rel='".implode(' ',$linkrel)."'";
 | |
| 		}
 | |
| 
 | |
| 		// This splits the text into blocks, some of which will precisely contain a linkword
 | |
| 		$split_line = preg_split('#\b('.$lw.')(\s|\b)#i'.$this->utfMode, $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );		// *utf (selected)
 | |
| 		//	$class = "".implode(' ',$lwClass)."' ";
 | |
| 
 | |
| 		$class = implode(' ',$lwClass);
 | |
| 
 | |
| 		$hash = md5($lw);
 | |
| 		$this->hash = $hash;
 | |
| 
 | |
| 		if(!isset($this->wordCount[$hash]))
 | |
| 		{
 | |
| 			$this->wordCount[$hash] = 0;
 | |
| 		}
 | |
| 
 | |
| 		foreach ($split_line as $count=>$sl)
 | |
| 		{
 | |
| 			if ($this->wordCount[$hash] < (int) $this->word_limit[$first] && ($tp->ustrtolower($sl) === $tp->ustrtolower($lw)))	// Do linkword replace		// We know the linkword is already lower case							// *utf
 | |
| 			{
 | |
| 				$this->wordCount[$hash]++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 				$classCount = " lw-".$this->wordCount[$hash];
 | |
| 
 | |
| 				if(empty($linkwd))
 | |
| 				{
 | |
| 					$ret .= "<span class=\"".$class.$classCount."\" ".$tooltip.">".$sl."</span>";
 | |
| 				}
 | |
| 				else
 | |
| 				{
 | |
| 					if(strpos($linkwd,'http')!==false)
 | |
| 					{
 | |
| 						$this->extLinks++;
 | |
| 					}
 | |
| 					else
 | |
| 					{
 | |
| 						$this->intLinks++;
 | |
| 					}
 | |
| 
 | |
| 					$ret .= "<a class=\"".$class.$classCount."\" ".$linkwd.$tooltip.">".$sl."</a>";
 | |
| 				}
 | |
| 
 | |
| 			}
 | |
| 			elseif (trim($sl)) // Something worthwhile left - look for more linkwords in it
 | |
| 			{
 | |
| 				$ret .= $this->linksproc($sl,$first+1,$limit);
 | |
| 			}
 | |
| 			else
 | |
| 			{
 | |
| 				$ret .= $sl;   // Probably just some white space
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		return $ret;
 | |
| 	}
 | |
| 
 | |
| 	function getStats()
 | |
| 	{
 | |
| 		return [
 | |
| 			'internal'  => $this->intLinks,
 | |
| 			'external'  => $this->extLinks,
 | |
| 		];
 | |
| 
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * @return void
 | |
| 	 */
 | |
| 	public function init()
 | |
| 	{
 | |
| 
 | |
| 		$tp = e107::getParser();
 | |
| 		$pref = e107::pref('linkwords');
 | |
| 		$frm = e107::getForm();
 | |
| 
 | |
| 		//	$this->maxPerWord       = vartrue($pref['lw_max_per_word'], 25);
 | |
| 		$this->customClass = vartrue($pref['lw_custom_class']);
 | |
| 		$this->area_opts = (array) varset($pref['lw_context_visibility']);
 | |
| 		$this->utfMode = (strtolower(CHARSET) === 'utf-8') ? 'u' : '';
 | |
| 		$this->lwAjaxEnabled = varset($pref['lw_ajax_enable'], 0);
 | |
| 		$this->suppressSamePageLink = (bool) vartrue($pref['lw_notsamepage'], false);
 | |
| 
 | |
| 
 | |
| 		// Now see if disabled on specific pages
 | |
| 		$check_url = e_SELF . (defined('e_QUERY') ? "?" . e_QUERY : '');
 | |
| 		$this->block_list = explode("|", substr(varset($pref['lw_page_visibility']), 2));    // Knock off the 'show/hide' flag
 | |
| 
 | |
| 		foreach($this->block_list as $p)
 | |
| 		{
 | |
| 			if($p = trim($p))
 | |
| 			{
 | |
| 				if(substr($p, -1) === '!')
 | |
| 				{
 | |
| 					$p = substr($p, 0, -1);
 | |
| 					if(substr($check_url, strlen($p) * -1) == $p)
 | |
| 					{
 | |
| 						return;
 | |
| 					}
 | |
| 				}
 | |
| 				else
 | |
| 				{
 | |
| 					if(strpos($check_url, $p) !== false)
 | |
| 					{
 | |
| 						return;
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 
 | |
| 		if($this->cache && ($temp = e107::getCache()->retrieve(self::LW_CACHE_TAG, false, true, true)))
 | |
| 		{
 | |
| 			if($data = e107::unserialize($temp))
 | |
| 			{
 | |
| 				foreach($data as $key => $val)
 | |
| 				{
 | |
| 					$this->$key = $val;
 | |
| 				}
 | |
| 
 | |
| 				$this->lw_enabled = true;
 | |
| 			}
 | |
| 			else
 | |
| 			{
 | |
| 				trigger_error("Error reading linkwords cache: " . self::LW_CACHE_TAG);
 | |
| 			}
 | |
| 
 | |
| 		}
 | |
| 
 | |
| 		if(empty($temp))    // Either cache disabled, or no info in cache (or error reading/processing cache)
 | |
| 		{
 | |
| 			$link_sql = e107::getDb('link_sql');
 | |
| 
 | |
| 			if($link_sql->select("linkwords", "*", "linkword_active!=1"))
 | |
| 			{
 | |
| 				$this->lw_enabled = true;
 | |
| 
 | |
| 				while($row = $link_sql->fetch())
 | |
| 				{
 | |
| 
 | |
| 					$lw = $tp->ustrtolower($row['linkword_word']);                    // It was trimmed when saved		*utf
 | |
| 					$lw = str_replace(''', "'", $lw); // Fix for apostrophies.
 | |
| 
 | |
| 					if($row['linkword_active'] == 2)
 | |
| 					{
 | |
| 						$row['linkword_link'] = '';        // Make sure linkword disabled
 | |
| 					}
 | |
| 
 | |
| 					if($row['linkword_active'] < 2)
 | |
| 					{
 | |
| 						$row['linkword_tooltip'] = '';    // Make sure tooltip disabled
 | |
| 					}
 | |
| 
 | |
| 
 | |
| 					if(strpos($lw, ',')) // Several words to same link
 | |
| 					{
 | |
| 						$lwlist = explode(',', $lw);
 | |
| 						foreach($lwlist as $lw)
 | |
| 						{
 | |
| 							$this->loadRow($lw, $row);
 | |
| 						}
 | |
| 					}
 | |
| 					else
 | |
| 					{
 | |
| 						$this->loadRow($lw, $row);
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 				if($this->cache) // Write to file for next time
 | |
| 				{
 | |
| 					$temp = [];
 | |
| 					foreach(array('word_list', 'word_class', 'word_limit', 'link_list', 'tip_list', 'ext_list', 'rel_list', 'LinkID') as $var)
 | |
| 					{
 | |
| 						$temp[$var] = $this->$var;
 | |
| 					}
 | |
| 
 | |
| 					e107::getCache()->set(self::LW_CACHE_TAG, e107::serialize($temp, 'json'), true, true, true);
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 |