b2evolution PHP Cross Reference Blogging Systems

Source: /inc/xhtml_validator/_xhtml_dtd.inc.php - 568 lines - 14298 bytes - Text - Print

Description: This file implements the XHTML "DTD" for the validator. Checks HTML against a subset of elements to ensure safety and XHTML validation.

   1  <?php
   2  /**
   3   * This file implements the XHTML "DTD" for the validator.
   4   *
   5   * Checks HTML against a subset of elements to ensure safety and XHTML validation.
   6   *
   7   * This file is part of the b2evolution/evocms project - {@link http://b2evolution.net/}.
   8   * See also {@link http://sourceforge.net/projects/evocms/}.
   9   *
  10   * @copyright (c)2003-2014 by Francois Planque - {@link http://fplanque.com/}.
  11   *
  12   * @license http://b2evolution.net/about/license.html GNU General Public License (GPL)
  13   *
  14   * @package evocore
  15   *
  16   * @version $Id: _xhtml_dtd.inc.php 6136 2014-03-08 07:59:48Z manuel $
  17   */
  18  if( !defined('EVO_MAIN_INIT') ) die( 'Please, do not access this page directly.' );
  19  
  20  global $use_strict;
  21  global $comments_allow_css_tweaks;
  22  
  23  /*
  24   * HTML Checker params:
  25   *
  26   * The params are defined twice: once for the posts and once for the comments.
  27   * Typically you'll be more restrictive on comments.
  28   *
  29   * Adapted from XHTML-1.0-Transitional/Strict by fplanque
  30   * http://www.w3.org/TR/2002/REC-xhtml1-20020801/dtds.html#a_dtd_XHTML-1.0-Strict
  31   */
  32  
  33  // DEFINITION of allowed XHTML code for POSTS (posted in the backoffice)
  34  
  35  /**
  36   * Allowed Entity classes
  37   */
  38  if( $allow_javascript )
  39  {
  40      $E_script_tags = 'script noscript';
  41  }
  42  else
  43  {
  44      $E_script_tags = '';
  45  }
  46  
  47  if( $use_strict )
  48  { // Strict
  49      $E_special_pre = 'br span bdo';
  50      $E_special = $E_special_pre.' img '.$E_script_tags;
  51  }
  52  else
  53  { // Transitional
  54      $E_special_extra = 'img';
  55      $E_special_basic = 'br span bdo';
  56      $E_special = $E_special_basic.' '.$E_special_extra.' '.$E_script_tags;
  57  }
  58  
  59  if( $use_strict )
  60  {
  61      $E_fontstyle = 'tt i b big small';                // Strict
  62  }
  63  else
  64  {
  65      $E_fontstyle_extra = 'big small font';            // Transitional
  66      $E_fontstyle_basic = 'tt i b u s strike';    // Transitional
  67      $E_fontstyle = $E_fontstyle_basic.' '.$E_fontstyle_extra;    // Transitional
  68  }
  69  
  70  if( $use_strict )
  71  {
  72      $E_phrase = 'em strong dfn code q samp kbd var cite abbr acronym sub sup'; // Strict
  73  }
  74  else
  75  {
  76      $E_phrase_extra = 'sub sup';                                                                                             // Transitional
  77      $E_phrase_basic = 'em strong dfn code q samp kbd var cite abbr acronym';    // Transitional
  78      $E_phrase = $E_phrase_basic.' '.$E_phrase_extra;                                                     // Transitional
  79  }
  80  
  81  $E_misc_inline = 'ins del';
  82  $E_misc = $E_misc_inline;
  83  $E_inline = 'a '.$E_special.' '.$E_fontstyle.' '.$E_phrase;
  84  $E_Iinline = '#PCDATA '.$E_inline.' '.$E_misc_inline;
  85  $E_heading = 'h1 h2 h3 h4 h5 h6';
  86  
  87  if( $use_strict )
  88  {
  89      $E_list = 'ul ol dl';                // Strict
  90  }
  91  else
  92  {
  93      $E_list = 'ul ol dl menu dir';    // Transitional
  94  }
  95  
  96  if( $use_strict )
  97  {
  98      $E_blocktext = 'pre hr blockquote address';            // Strict
  99  }
 100  else
 101  {
 102      $E_blocktext = 'pre hr blockquote address center';    // Transitional
 103  }
 104  
 105  if( $allow_iframes )
 106  {
 107      $E_block = 'p '.$E_heading.' div '.$E_list.' '.$E_blocktext.' fieldset table iframe';
 108  }
 109  else
 110  {
 111      $E_block = 'p '.$E_heading.' div '.$E_list.' '.$E_blocktext.' fieldset table';
 112  }
 113  
 114  if( $use_strict )
 115  {
 116      $E_Bblock = $E_block.' '.$E_misc;            // Strict only
 117  }
 118  
 119  if( $allow_objects )
 120  {
 121      $E_Flow = '#PCDATA '.$E_block.' '.$E_inline.' '.$E_misc.' object embed';
 122  }
 123  else
 124  {
 125      $E_Flow = '#PCDATA '.$E_block.' '.$E_inline.' '.$E_misc ;
 126  }
 127  $E_a_content = '#PCDATA '.$E_special.' '.$E_fontstyle.' '.$E_phrase.' '.$E_misc_inline;
 128  
 129  if( $use_strict )
 130  {
 131      $E_pre_content = '#PCDATA a '.$E_fontstyle.' '.$E_phrase.' '.$E_special_pre.' '.$E_misc_inline; // Strict
 132  }
 133  else
 134  {
 135      $E_pre_content = '#PCDATA a '.$E_special_basic.' '.$E_fontstyle_basic.' '.$E_phrase_basic.' '.$E_misc_inline; // Transitional
 136  }
 137  
 138  // Allowed Attribute classes
 139  // TODO: individual checkboxes for class / style / id
 140  $A_coreattrs = 'class title'.( $allow_css_tweaks ? ' style' : '' )                    // 'id' is really nasty
 141                      .( $allow_javascript ? ' onmouseover onmouseout onclick' : '' );
 142  $A_i18n = 'lang xml:lang dir';
 143  $A_attrs = $A_coreattrs.' '.$A_i18n;
 144  
 145  if( !$use_strict )
 146  {
 147      $A_TextAlign = 'align';                                    // Transitional only
 148  }
 149  else
 150  {
 151      $A_TextAlign = '';
 152  }
 153  
 154  $A_cellhalign = 'align char charoff';
 155  $A_cellvalign = 'valign';
 156  
 157  // Array showing what tags are allowed and what their allowed subtags are.
 158  $allowed_tags = array
 159  (
 160      'body' => $E_Flow, // Remember this is not a true body, just a post body
 161      'div' => $E_Flow,
 162      'p' => $E_Iinline,
 163      'h1' => $E_Iinline,
 164      'h2' => $E_Iinline,
 165      'h3' => $E_Iinline,
 166      'h4' => $E_Iinline,
 167      'h5' => $E_Iinline,
 168      'h6' => $E_Iinline,
 169      'ul' => 'li',
 170      'ol' => 'li',
 171  );
 172  
 173  if( !$use_strict )
 174  {
 175      $allowed_tags += array
 176      (
 177          'menu' => 'li',        // Transitional only
 178          'dir' => 'li',        // Transitional only
 179      );
 180  }
 181  
 182  $allowed_tags += array
 183  (
 184      'li' => $E_Flow,
 185      'dl' => 'dt dd',
 186      'dt' => $E_Iinline,
 187      'dd' => $E_Flow,
 188  );
 189  
 190  if( $use_strict )
 191  {
 192      $allowed_tags += array
 193      (
 194          'address' => $E_Iinline,                                                        // Strict
 195      );
 196  }
 197  else
 198  {
 199      $allowed_tags += array
 200      (
 201          'address' => '#PCDATA '.$E_inline.' '.$E_misc_inline,        // Transitional
 202      );
 203  }
 204  
 205  $allowed_tags += array
 206      (
 207          'hr' => '',
 208          'pre' => $E_pre_content,
 209      );
 210  
 211  if( $use_strict )
 212  {
 213      $allowed_tags += array
 214      (
 215          'blockquote' => $E_Bblock,        // Strict
 216      );
 217  }
 218  else
 219  {
 220      $allowed_tags += array
 221      (
 222          'blockquote' => $E_Flow,                    // Transitional
 223          'center' => $E_Flow,                    // Transitional only
 224      );
 225  }
 226  
 227  $allowed_tags += array
 228  (
 229      'ins' => $E_Flow,
 230      'del' => $E_Flow,
 231      'a' => $E_a_content,
 232      'span' => $E_Iinline,
 233      'bdo' => $E_Iinline,
 234      'br' => '',
 235      'em' => $E_Iinline,
 236      'strong' => $E_Iinline,
 237      'dfn' => $E_Iinline,
 238      'code' => $E_Iinline,
 239      'samp' => $E_Iinline,
 240      'kbd' => $E_Iinline,
 241      'var' => $E_Iinline,
 242      'cite' => $E_Iinline,
 243      'abbr' => $E_Iinline,
 244      'acronym' => $E_Iinline,
 245      'q' => $E_Iinline,
 246      'sub' => $E_Iinline,
 247      'sup' => $E_Iinline,
 248      'tt' => $E_Iinline,
 249      'i' => $E_Iinline,
 250      'b' => $E_Iinline,
 251      'big' => $E_Iinline,
 252      'small' => $E_Iinline,
 253  );
 254  
 255  if( !$use_strict )
 256  {
 257      $allowed_tags += array
 258      (
 259          'u' => $E_Iinline,                        // Transitional only
 260          's' => $E_Iinline,                        // Transitional only
 261          'strike' => $E_Iinline,            // Transitional only
 262          'font' => $E_Iinline,                // Transitional only
 263      );
 264  }
 265  
 266  $allowed_tags += array
 267  (
 268      'img' => '',
 269      'fieldset' => '#PCDATA legend '.$E_block.' '.$E_inline.' '.$E_misc,
 270      'legend' => $E_Iinline,
 271      'table' => 'caption col colgroup thead tfoot tbody tr',
 272      'caption' => $E_Iinline,
 273      'thead' => 'tr',
 274      'tfoot' => 'tr',
 275      'tbody' => 'tr',
 276      'colgroup' => 'col',
 277      'tr' => 'th td',
 278      'th' => $E_Flow,
 279      'td' => $E_Flow,
 280  );
 281  
 282  if( $allow_javascript )
 283  {
 284      $allowed_tags += array
 285      (
 286          'script' => '#PCDATA',
 287          'noscript' => $E_Flow,
 288      );
 289  }
 290  
 291  // Array showing allowed attributes for tags
 292  if( $use_strict )
 293  {
 294      $allowed_attributes = array
 295      (    // Strict
 296          'div' => $A_attrs,
 297          'p' => $A_attrs,
 298          'h1' => $A_attrs,
 299          'h2' => $A_attrs,
 300          'h3' => $A_attrs,
 301          'h4' => $A_attrs,
 302          'h5' => $A_attrs,
 303          'h6' => $A_attrs,
 304          'ul' => $A_attrs,
 305          'ol' => $A_attrs,
 306          'li' => $A_attrs,
 307          'dl' => $A_attrs,
 308          'hr' => $A_attrs,
 309          'pre' => $A_attrs.' xml:space',
 310          'a' => $A_attrs.' charset type href hreflang rel rev shape coords name',
 311          'br' => $A_coreattrs,
 312          'img' => $A_attrs.' src alt longdesc height width usemap ismap',
 313          'legend' => $A_attrs,
 314          'table' => $A_attrs.' summary width border frame rules cellspacing cellpadding',
 315          'caption' => $A_attrs,
 316          'tr' => $A_attrs.' '.$A_cellhalign.' '.$A_cellvalign,
 317          'th' => $A_attrs.' abbr axis headers scope rowspan colspan '.$A_cellhalign.' '.$A_cellvalign,
 318          'td' => $A_attrs.' abbr axis headers scope rowspan colspan '.$A_cellhalign.' '.$A_cellvalign,
 319      );
 320  }
 321  else
 322  {
 323      $allowed_attributes = array
 324      (    // Transitional
 325          'div' => $A_attrs.' '.$A_TextAlign,
 326          'p' => $A_attrs.' '.$A_TextAlign,
 327          'h1' => $A_attrs.' '.$A_TextAlign,
 328          'h2' => $A_attrs.' '.$A_TextAlign,
 329          'h3' => $A_attrs.' '.$A_TextAlign,
 330          'h4' => $A_attrs.' '.$A_TextAlign,
 331          'h5' => $A_attrs.' '.$A_TextAlign,
 332          'h6' => $A_attrs.' '.$A_TextAlign,
 333          'ul' => $A_attrs.' type compact',
 334          'ol' => $A_attrs.' type compact start',
 335          'menu' => $A_attrs.' compact',            // Transitional only
 336          'dir' => $A_attrs.' compact',            // Transitional only
 337          'li' => $A_attrs.' type value',
 338          'dl' => $A_attrs.' compact',
 339          'hr' => $A_attrs.' align noshade size width',
 340          'pre' => $A_attrs.' width xml:space',
 341          'center' => $A_attrs,                    // Transitional only
 342          // sam2kb> TODO: 'name' is deprecated by 'id', we should allow 'id' in <a> tags without 'href' attribute
 343          'a' => $A_attrs.' charset type href hreflang rel rev shape coords target name',
 344          'br' => $A_coreattrs.' clear',
 345          'u' => $A_attrs,                        // Transitional only
 346          's' => $A_attrs,                        // Transitional only
 347          'strike' => $A_attrs,                    // Transitional only
 348          'font' => $A_coreattrs.' '.$A_i18n.' size color face',    // Transitional only
 349          'img' => $A_attrs.' src alt name longdesc height width usemap ismap align border hspace vspace',
 350          'legend' => $A_attrs.' align',
 351          'table' => $A_attrs.' summary width border frame rules cellspacing cellpadding align bgcolor',
 352          'caption' => $A_attrs.' align',
 353          'tr' => $A_attrs.' '.$A_cellhalign.' '.$A_cellvalign.' bgcolor',
 354          'th' => $A_attrs.' abbr axis headers scope rowspan colspan '.$A_cellhalign.' '.$A_cellvalign.' nowrap bgcolor width height',
 355          'td' => $A_attrs.' abbr axis headers scope rowspan colspan '.$A_cellhalign.' '.$A_cellvalign.' nowrap bgcolor width height',
 356      );
 357  }
 358  $allowed_attributes += array
 359  (
 360      'fieldset' => $A_attrs,
 361  
 362      'ins' => $A_attrs.' cite datetime',
 363      'del' => $A_attrs.' cite datetime',
 364      'blockquote' => $A_attrs.' cite',
 365      'span' => $A_attrs,
 366      'bdo' => $A_coreattrs.' lang xml:lang dir',
 367      'dt' => $A_attrs,
 368      'dd' => $A_attrs,
 369  
 370      'address' => $A_attrs,
 371  
 372      'em' => $A_attrs,
 373      'strong' => $A_attrs,
 374      'dfn' => $A_attrs,
 375      'code' => $A_attrs,
 376      'samp' => $A_attrs,
 377      'kbd' => $A_attrs,
 378      'var' => $A_attrs,
 379      'cite' => $A_attrs,
 380      'abbr' => $A_attrs,
 381      'acronym' => $A_attrs,
 382      'q' => $A_attrs.' cite',
 383      'sub' => $A_attrs,
 384      'sup' => $A_attrs,
 385      'tt' => $A_attrs,
 386      'i' => $A_attrs,
 387      'b' => $A_attrs,
 388      'big' => $A_attrs,
 389      'small' => $A_attrs,
 390      'colgroup' => $A_attrs.' span width cellhalign cellvalign',
 391      'col' => $A_attrs.' span width cellhalign cellvalign',
 392      'thead' => $A_attrs.' '.$A_cellhalign.' '.$A_cellvalign,
 393      'tfoot' => $A_attrs.' '.$A_cellhalign.' '.$A_cellvalign,
 394      'tbody' => $A_attrs.' '.$A_cellhalign.' '.$A_cellvalign,
 395  
 396  );
 397  
 398  if( $allow_javascript )
 399  {
 400      $allowed_attributes += array
 401      (
 402          'script' => 'type charset src',
 403          'noscript' => '',
 404      );
 405  }
 406  
 407  if( $allow_iframes )
 408  {
 409      $allowed_tags += array
 410      (
 411          'iframe' => '',
 412      );
 413      $allowed_attributes += array
 414      (
 415           'iframe' => $A_attrs.' '.$A_TextAlign.' src width height frameborder marginwidth marginheight scrolling',        // Transitional
 416      );
 417  }
 418  
 419  if( $allow_objects )
 420  {
 421      $allowed_tags += array
 422      (
 423          'object' => 'param embed',
 424        'param' => '',
 425        'embed' => '',
 426      );
 427      $allowed_attributes += array
 428      (
 429        'object' => 'codebase classid id height width align type data wmode',
 430        'param' => 'name value',
 431        'embed' => 'src type height width wmode quality bgcolor name align pluginspage flashvars allowfullscreen allowscriptaccess',
 432      );
 433  }
 434  
 435  
 436  
 437  // -----------------------------------------------------------------------------
 438  
 439  // DEFINITION of allowed XHTML code for COMMENTS (posted from the public blog pages)
 440  
 441  
 442  // Allowed Entity classes
 443  $C_E_special_pre = 'br span bdo';
 444  $C_E_special = $C_E_special_pre;
 445  $C_E_fontstyle = 'tt i b big small';
 446  $C_E_phrase = 'em strong dfn code q samp kbd var cite abbr acronym sub sup';
 447  $C_E_misc_inline = 'ins del';
 448  $C_E_misc = $C_E_misc_inline;
 449  $C_E_inline = 'a '.$C_E_special.' '.$C_E_fontstyle.' '.$C_E_phrase;
 450  $C_E_Iinline = '#PCDATA '.$C_E_inline.' '.$C_E_misc_inline;
 451  $C_E_heading = '';
 452  $C_E_list = 'ul ol dl';
 453  $C_E_blocktext = 'hr blockquote address';
 454  $C_E_block = 'p '.$C_E_heading.' div '.$C_E_list.' '.$C_E_blocktext.' table';
 455  $C_E_Bblock = $C_E_block.' '.$C_E_misc;
 456  $C_E_Flow = '#PCDATA '.$C_E_block.' '.$C_E_inline.' '.$C_E_misc;
 457  $C_E_a_content = '#PCDATA '.$C_E_special.' '.$C_E_fontstyle.' '.$C_E_phrase.' '.$C_E_misc_inline;
 458  $C_E_pre_content = '#PCDATA a '.$C_E_fontstyle.' '.$C_E_phrase.' '.$C_E_special_pre.' '.$C_E_misc_inline;
 459  
 460  // Allowed Attribute classes
 461  $C_A_coreattrs = 'class title'.( $comments_allow_css_tweaks ? ' style' : '' );  // 'id' is really nasty
 462  $C_A_i18n = 'lang xml:lang dir';
 463  $C_A_attrs = $C_A_coreattrs.' '.$C_A_i18n;
 464  $C_A_cellhalign = 'align char charoff';
 465  $C_A_cellvalign = 'valign';
 466  
 467  /**
 468   * Array showing what tags are allowed and what their allowed subtags are.
 469   * @global array
 470   */
 471  $comments_allowed_tags = array
 472  (
 473      'body' => $E_Flow, // Remember this is not a true body, just a comment body
 474      'p' => $C_E_Iinline,
 475      'ul' => 'li',
 476      'ol' => 'li',
 477      'li' => $C_E_Flow,
 478      'dl' => 'dt dd',
 479      'dt' => $C_E_Iinline,
 480      'dd' => $C_E_Flow,
 481      'address' => $C_E_Iinline,
 482      'hr' => '',
 483  );
 484  if( $use_strict )
 485  {
 486      $comments_allowed_tags += array
 487      (
 488          'blockquote' => $C_E_Bblock,        // XHTML-1.0-Strict
 489      );
 490  }
 491  else
 492  {
 493      $comments_allowed_tags += array
 494      (
 495          'blockquote' => $C_E_Flow,                // XHTML-1.0-Transitional
 496      );
 497  }
 498  $comments_allowed_tags += array
 499  (
 500      'ins' => $C_E_Flow,
 501      'del' => $C_E_Flow,
 502  //    'a' => $C_E_a_content,  // Allowing this will call for a whole lot of comment spam!!!
 503      'span' => $C_E_Iinline,
 504      'bdo' => $C_E_Iinline,
 505      'br' => '',
 506      'em' => $C_E_Iinline,
 507      'strong' => $C_E_Iinline,
 508      'dfn' => $C_E_Iinline,
 509      'code' => $C_E_Iinline,
 510      'samp' => $C_E_Iinline,
 511      'kbd' => $C_E_Iinline,
 512      'var' => $C_E_Iinline,
 513      'cite' => $C_E_Iinline,
 514      'abbr' => $C_E_Iinline,
 515      'acronym' => $C_E_Iinline,
 516      'q' => $C_E_Iinline,
 517      'sub' => $C_E_Iinline,
 518      'sup' => $C_E_Iinline,
 519      'tt' => $C_E_Iinline,
 520      'i' => $C_E_Iinline,
 521      'b' => $C_E_Iinline,
 522      'big' => $C_E_Iinline,
 523      'small' => $C_E_Iinline,
 524  );
 525  
 526  
 527  /**
 528   * Array showing allowed attributes for tags.
 529   * @global array
 530   */
 531  $comments_allowed_attributes = array
 532  (
 533      'p' => $C_A_attrs,
 534      'ul' => $C_A_attrs,
 535      'ol' => $C_A_attrs,
 536      'li' => $C_A_attrs,
 537      'dl' => $C_A_attrs,
 538      'dt' => $C_A_attrs,
 539      'dd' => $C_A_attrs,
 540      'address' => $C_A_attrs,
 541      'blockquote' => $C_A_attrs.' cite',
 542      'ins' => $C_A_attrs.' cite datetime',
 543      'del' => $C_A_attrs.' cite datetime',
 544      'a' => $C_A_attrs.' charset type href hreflang rel rev shape coords',
 545      'span' => $C_A_attrs,
 546      'bdo' => $C_A_coreattrs.' lang xml:lang dir',
 547      'br' => $C_A_coreattrs,
 548      'em' => $C_A_attrs,
 549      'strong' => $C_A_attrs,
 550      'dfn' => $C_A_attrs,
 551      'code' => $C_A_attrs,
 552      'samp' => $C_A_attrs,
 553      'kbd' => $C_A_attrs,
 554      'var' => $C_A_attrs,
 555      'cite' => $C_A_attrs,
 556      'abbr' => $C_A_attrs,
 557      'acronym' => $C_A_attrs,
 558      'q' => $C_A_attrs.' cite',
 559      'sub' => $C_A_attrs,
 560      'sup' => $C_A_attrs,
 561      'tt' => $C_A_attrs,
 562      'i' => $C_A_attrs,
 563      'b' => $C_A_attrs,
 564      'big' => $C_A_attrs,
 565      'small' => $C_A_attrs,
 566  );
 567  
 568  ?>

title

Description

title

Description

title

Description

title

title

Body