b2evolution PHP Cross Reference Blogging Systems

Source: /inc/xhtml_validator/_xhtml_validator.class.php - 340 lines - 9848 bytes - Summary - Text - Print

Description: This file implements the XHTML_Validator class. Checks HTML against a subset of elements to ensure safety and XHTML validation.

   1  <?php
   2  /**
   3   * This file implements the XHTML_Validator class.
   4   *
   5   * Checks HTML against a subset of elements to ensure safety and XHTML validation.
   6   *
   7   * This file is part of the b2evolution/evocms project - {@link http://b2evolution.net/}.
   8   * See also {@link http://sourceforge.net/projects/evocms/}.
   9   *
  10   * @copyright (c)2003-2014 by Francois Planque - {@link http://fplanque.com/}.
  11   * Parts of this file are copyright (c)2003 by Nobuo SAKIYAMA - {@link http://www.sakichan.org/}
  12   * Parts of this file are copyright (c)2004-2005 by Daniel HAHLER - {@link http://thequod.de/contact}.
  13   *
  14   * @license http://b2evolution.net/about/license.html GNU General Public License (GPL)
  15   *
  16   * {@internal Open Source relicensing agreement:
  17   * Daniel HAHLER grants Francois PLANQUE the right to license
  18   * Daniel HAHLER's contributions to this file and the b2evolution project
  19   * under any OSI approved OSS license (http://www.opensource.org/licenses/).
  20   * }}
  21   *
  22   * {@internal Origin:
  23   * This file was inspired by Simon Willison's SafeHtmlChecker released in
  24   * the public domain on 23rd Feb 2003.
  25   * {@link http://simon.incutio.com/code/php/SafeHtmlChecker.class.php.txt}
  26   * }}
  27   *
  28   * @package evocore
  29   *
  30   * {@internal Below is a list of authors who have contributed to design/coding of this file: }}
  31   * @author blueyed: Daniel HAHLER.
  32   * @author fplanque: Francois PLANQUE.
  33   * @author sakichan: Nobuo SAKIYAMA.
  34   * @author Simon Willison.
  35   *
  36   * @version $Id: _xhtml_validator.class.php 6136 2014-03-08 07:59:48Z manuel $
  37   */
  38  if( !defined('EVO_MAIN_INIT') ) die( 'Please, do not access this page directly.' );
  39  
  40  
  41  /**
  42   *  Load required funcs
  43   */
  44  load_funcs('_core/_url.funcs.php');
  45  
  46  
  47  /**
  48   * XHTML_Validator
  49   *
  50   * checks HTML against a subset of elements to ensure safety and XHTML validation.
  51   *
  52   * @package evocore
  53   */
  54  class XHTML_Validator
  55  {
  56      var $tags;      // Array showing allowed attributes for tags
  57      var $tagattrs;  // Array showing URI attributes
  58      var $uri_attrs;
  59      var $allowed_uri_scheme;
  60  
  61      // Internal variables
  62      var $parser;
  63      var $stack = array();
  64      var $last_checked_pos;
  65      var $error;
  66  
  67      /**
  68       * Constructor
  69       *
  70       * {@internal This gets tested in _libs.misc.simpletest.php}}
  71       *
  72       * @param string Context
  73       * @param boolean Allow CSS tweaks?
  74       * @param boolean Allow IFrames?
  75       * @param boolean Allow Javascript?
  76       * @param boolean Allow Objects?
  77       * @param string Input encoding to use ('ISO-8859-1', 'UTF-8', 'US-ASCII' or '' for auto-detect)
  78       * @param string Message type for errors
  79       */
  80  	function XHTML_Validator( $context = 'posting', $allow_css_tweaks = false, $allow_iframes = false, $allow_javascript = false, $allow_objects = false, $encoding = NULL, $msg_type = 'error' )
  81      {
  82          global $inc_path;
  83  
  84          require $inc_path.'xhtml_validator/_xhtml_dtd.inc.php';
  85  
  86          $this->context = $context;
  87  
  88          switch( $context )
  89          {
  90              case 'posting':
  91              case 'xmlrpc_posting':
  92                  $this->tags = & $allowed_tags;
  93                  $this->tagattrs = & $allowed_attributes;
  94                  break;
  95  
  96              case 'commenting':
  97                  $this->tags = & $comments_allowed_tags;
  98                  $this->tagattrs = & $comments_allowed_attributes;
  99                  break;
 100  
 101              default:
 102                  debug_die( 'unknown context: '.$context );
 103          }
 104  
 105          // Attributes that need to be checked for a valid URI:
 106          $this->uri_attrs = array
 107          (
 108              'xmlns',
 109              'profile',
 110              'href',
 111              'src',
 112              'cite',
 113              'classid',
 114              'codebase',
 115              'data',
 116              'archive',
 117              'usemap',
 118              'longdesc',
 119              'action'
 120          );
 121  
 122          $this->allowed_uri_scheme = get_allowed_uri_schemes( $context );
 123  
 124          $this->msg_type = $msg_type;
 125  
 126          if( empty($encoding) )
 127          {
 128              global $io_charset;
 129              $encoding = $io_charset;
 130          }
 131          $encoding = strtoupper($encoding); // we might get 'iso-8859-1' for example
 132          $this->encoding = $encoding;
 133          if( ! in_array( $encoding, array( 'ISO-8859-1', 'UTF-8', 'US-ASCII' ) ) )
 134          { // passed encoding not supported by xml_parser_create()
 135              $this->xml_parser_encoding = ''; // auto-detect (in PHP4, in PHP5 anyway)
 136          }
 137          else
 138          {
 139              $this->xml_parser_encoding = $this->encoding;
 140          }
 141          $this->parser = xml_parser_create( $this->xml_parser_encoding );
 142  
 143          $this->last_checked_pos = 0;
 144          $this->error = false;
 145  
 146          // Creates the parser
 147          xml_set_object( $this->parser, $this);
 148  
 149          // set functions to call when a start or end tag is encountered
 150          xml_set_element_handler($this->parser, 'tag_open', 'tag_close');
 151          // set function to call for the actual data
 152          xml_set_character_data_handler($this->parser, 'cdata');
 153  
 154          xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, false);
 155      }
 156  
 157  
 158      /**
 159       * check(-)
 160       */
 161  	function check($xhtml)
 162      {
 163          // Convert encoding:
 164          // TODO: use convert_encoding()
 165          if( empty($this->xml_parser_encoding) || $this->encoding != $this->xml_parser_encoding )
 166          { // we need to convert encoding:
 167              if( function_exists( 'mb_convert_encoding' ) )
 168              { // we can convert encoding to UTF-8
 169                  $this->encoding = 'UTF-8';
 170  
 171                  // Convert XHTML:
 172                  $xhtml = mb_convert_encoding( $xhtml, 'UTF-8' );
 173              }
 174              elseif( ($this->encoding == 'ISO-8859-1' || empty($this->encoding)) && function_exists('utf8_encode') )
 175              {
 176                  $this->encoding = 'UTF-8';
 177  
 178                  $xhtml = utf8_encode( $xhtml );
 179              }
 180          }
 181  
 182          // Open comments or '<![CDATA[' are dangerous
 183          $xhtml = str_replace('<!', '', $xhtml);
 184  
 185          // Convert isolated & chars
 186          $xhtml = preg_replace( '#(\s)&(\s)#', '\\1&amp;\\2', $xhtml );
 187  
 188          $xhtml_head = '<?xml version="1.0"';
 189          if( ! empty($this->encoding) )
 190          {
 191              $xhtml_head .= ' encoding="'.$this->encoding.'"';
 192          }
 193  
 194          $xhtml_head .= '?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"';
 195  
 196          // Include entities:
 197          $xhtml_head .= '[';
 198          // Include latin1 entities (http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent):
 199          $xhtml_head .= file_get_contents( dirname(__FILE__).'/_xhtml-lat1.ent' );
 200          // Include symbol entities (http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent):
 201          $xhtml_head .= file_get_contents( dirname(__FILE__).'/_xhtml-symbol.ent' );
 202          // Include special entities (http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent):
 203          $xhtml_head .= file_get_contents( dirname(__FILE__).'/_xhtml-special.ent' );
 204          $xhtml_head .= ']>';
 205  
 206          $xhtml = $xhtml_head.'<body>'.$xhtml.'</body>';
 207          unset($xhtml_head);
 208  
 209          if( !xml_parse($this->parser, $xhtml) )
 210          {
 211              $xml_error_code = xml_get_error_code( $this->parser );
 212              $xml_error_string = xml_error_string( $xml_error_code );
 213              switch( $xml_error_code )
 214              {
 215                  case XML_ERROR_TAG_MISMATCH:
 216                      $xml_error_string .= ': <code>'.$this->stack[count($this->stack)-1].'</code>';
 217                      break;
 218              }
 219              $pos = xml_get_current_byte_index($this->parser);
 220              $xml_error_string .= ' near <code>'.htmlspecialchars( evo_substr( $xhtml, $this->last_checked_pos, $pos-$this->last_checked_pos+20 ) ).'</code>';
 221  
 222              $this->html_error( T_('Parser error: ').$xml_error_string );
 223          }
 224  
 225          return $this->isOK();
 226      }
 227  
 228  
 229      /**
 230       * tag_open(-)
 231       *
 232       * Called when the parser finds an opening tag
 233       */
 234  	function tag_open($parser, $tag, $attrs)
 235      {
 236          global $debug;
 237  
 238          // echo "processing tag: $tag <br />\n";
 239          $this->last_checked_pos = xml_get_current_byte_index($this->parser);
 240  
 241          if ($tag == 'body')
 242          {
 243              if( count($this->stack) > 0 )
 244                  $this->html_error( T_('Tag <code>body</code> can only be used once!') );
 245              $this->stack[] = $tag;
 246              return;
 247          }
 248          $previous = $this->stack[count($this->stack)-1];
 249  
 250          // If previous tag is illegal, no point in running tests
 251          if (!in_array($previous, array_keys($this->tags))) {
 252              $this->stack[] = $tag;
 253              return;
 254          }
 255          // Is tag a legal tag?
 256          if (!in_array($tag, array_keys($this->tags))) {
 257              $this->html_error( T_('Illegal tag'). ": <code>$tag</code>" );
 258              $this->stack[] = $tag;
 259              return;
 260          }
 261          // Is tag allowed in the current context?
 262          if (!in_array($tag, explode(' ', $this->tags[$previous]))) {
 263              if ($previous == 'body') {
 264                  $this->html_error(    sprintf( T_('Tag &lt;%s&gt; must occur inside another tag'), '<code>'.$tag.'</code>' ) );
 265              } else {
 266                  $this->html_error(    sprintf( T_('Tag &lt;%s&gt; is not allowed within tag &lt;%s&gt;'), '<code>'.$tag.'</code>', '<code>'.$previous.'</code>') );
 267              }
 268          }
 269          // Are tag attributes valid?
 270          foreach( $attrs as $attr => $value )
 271          {
 272              if (!isset($this->tagattrs[$tag]) || !in_array($attr, explode(' ', $this->tagattrs[$tag])))
 273              {
 274                  $this->html_error( sprintf( T_('Tag &lt;%s&gt; may not have attribute %s="..."'), '<code>'.$tag.'</code>', '<code>'.$attr.'</code>' ) );
 275              }
 276  
 277              if (in_array($attr, $this->uri_attrs))
 278              { // This attribute must be checked for URIs
 279                  $matches = array();
 280                  $value = trim($value);
 281                  if( $error = validate_url( $value, $this->context, false ) ) //Note: We do not check for spam here, should be done on whole message in check_html_sanity()
 282                  {
 283                      $this->html_error( T_('Found invalid URL: ').$error );
 284                  }
 285              }
 286  
 287          }
 288          // Set previous, used for checking nesting context rules
 289          $this->stack[] = $tag;
 290      }
 291  
 292      /**
 293       * cdata(-)
 294       */
 295  	function cdata($parser, $cdata)
 296      {
 297          $this->last_checked_pos = xml_get_current_byte_index($this->parser);
 298  
 299          // Simply check that the 'previous' tag allows CDATA
 300          $previous = $this->stack[count($this->stack)-1];
 301          // If previous tag is illegal, no point in running test
 302          if (!in_array($previous, array_keys($this->tags))) {
 303              return;
 304          }
 305          if (trim($cdata) != '') {
 306              if (!in_array('#PCDATA', explode(' ', $this->tags[$previous]))) {
 307                  $this->html_error(    sprintf( T_('Tag &lt;%s&gt; may not contain raw character data'), '<code>'.$previous.'</code>' ) );
 308              }
 309          }
 310      }
 311  
 312      /**
 313       * tag_close(-)
 314       */
 315  	function tag_close($parser, $tag)
 316      {
 317          $this->last_checked_pos = xml_get_current_byte_index($this->parser);
 318  
 319          // Move back one up the stack
 320          array_pop($this->stack);
 321      }
 322  
 323  	function html_error( $string )
 324      {
 325          global $Messages;
 326          $this->error = true;
 327          $Messages->add( $string, $this->msg_type );
 328      }
 329  
 330      /**
 331       * isOK(-)
 332       */
 333  	function isOK()
 334      {
 335          return ! $this->error;
 336      }
 337  
 338  }
 339  
 340  ?>

title

Description

title

Description

title

Description

title

title

Body