b2evolution PHP Cross Reference Blogging Systems

Source: /plugins/basic_antispam_plugin/_basic_antispam.plugin.php - 645 lines - 17518 bytes - Summary - Text - Print

Description: This file implements the basic Antispam plugin. This file is part of the b2evolution project - {@link http://b2evolution.net/}

   1  <?php
   2  /**
   3   * This file implements the basic Antispam plugin.
   4   *
   5   * This file is part of the b2evolution project - {@link http://b2evolution.net/}
   6   *
   7   * @copyright (c)2003-2014 by Francois Planque - {@link http://fplanque.com/}
   8   * Parts of this file are copyright (c)2004-2006 by Daniel HAHLER - {@link http://thequod.de/contact}.
   9   *
  10   * {@internal License choice
  11   * - If you have received this file as part of a package, please find the license.txt file in
  12   *   the same folder or the closest folder above for complete license terms.
  13   * - If you have received this file individually (e-g: from http://evocms.cvs.sourceforge.net/)
  14   *   then you must choose one of the following licenses before using the file:
  15   *   - GNU General Public License 2 (GPL) - http://www.opensource.org/licenses/gpl-license.php
  16   *   - Mozilla Public License 1.1 (MPL) - http://www.opensource.org/licenses/mozilla1.1.php
  17   * }}
  18   *
  19   * {@internal Open Source relicensing agreement:
  20   * Daniel HAHLER grants Francois PLANQUE the right to license
  21   * Daniel HAHLER's contributions to this file and the b2evolution project
  22   * under any OSI approved OSS license (http://www.opensource.org/licenses/).
  23   * }}
  24   *
  25   * @package plugins
  26   *
  27   * {@internal Below is a list of authors who have contributed to design/coding of this file: }}
  28   * @author blueyed: Daniel HAHLER - {@link http://daniel.hahler.de/}
  29   *
  30   * @version $Id: _basic_antispam.plugin.php 6136 2014-03-08 07:59:48Z manuel $
  31   */
  32  if( !defined('EVO_MAIN_INIT') ) die( 'Please, do not access this page directly.' );
  33  
  34  
  35  /**
  36   * Basic Antispam Plugin
  37   *
  38   * This plugin doublechecks referers/referrers for Hit logging and trackbacks.
  39   *
  40   * @todo Ideas:
  41   *  - forbid cloned comments (same content) (on the same entry or all entries)
  42   *  - detect same/similar URLs in a short period (also look at author name: if it differs, it's more likely to be spam)
  43   */
  44  class basic_antispam_plugin extends Plugin
  45  {
  46      /**
  47       * Variables below MUST be overriden by plugin implementations,
  48       * either in the subclass declaration or in the subclass constructor.
  49       */
  50      var $name = 'Basic Antispam';
  51      var $code = '';
  52      var $priority = 60;
  53      var $version = '5.0.0';
  54      var $author = 'The b2evo Group';
  55      var $group = 'antispam';
  56      var $number_of_installs = 1;
  57  
  58  
  59      /**
  60       * Init
  61       */
  62  	function PluginInit( & $params )
  63      {
  64          $this->short_desc = T_('Basic antispam methods');
  65          $this->long_desc = T_('This plugin provides basic methods to detect & block spam on referers, comments & trackbacks.');
  66      }
  67  
  68  
  69  	function GetDefaultSettings()
  70      {
  71          return array(
  72                  'check_dupes' => array(
  73                      'type' => 'checkbox',
  74                      'label' => T_('Detect feedback duplicates'),
  75                      'note' => T_('Check this to check comments and trackback for duplicate content.'),
  76                      'defaultvalue' => '1',
  77                  ),
  78                  'max_number_of_links_feedback' => array(
  79                      'type' => 'integer',
  80                      'label' => T_('Feedback sensitivity to links'),
  81                      'note' => T_('If a comment has more than this number of links in it, it will get 100 percent spam karma. -1 to disable it.'),
  82                      'help' => '#set_max_number_of_links',
  83                      'defaultvalue' => '4',
  84                      'size' => 3,
  85                  ),
  86                  'trim_whitespace' => array(
  87                      'type' => 'checkbox',
  88                      'label' => T_('Strip whitespace'),
  89                      'note' => T_('Strip whitespace from the beginning and end of comment content.'),
  90                      'defaultvalue' => 1,
  91                  ),
  92                  'remove_repetitions' => array(
  93                      'type' => 'checkbox',
  94                      'label' => T_('Remove repetitive characters'),
  95                      'note'=>T_('Remove repetitive characters in name and content. The string like "Thaaaaaaaaaanks!" becomes "Thaaanks!".'),
  96                      'defaultvalue' => 0,
  97                  ),
  98                  'block_common_spam' => array(
  99                      'type' => 'checkbox',
 100                      'label' => T_('Block common spam comments'),
 101                      'note'=>T_('Block comments with both "[link=" and "[url=" tags.'),
 102                      'defaultvalue' => 1,
 103                  ),
 104                  'nofollow_for_hours' => array(
 105                      'type' => 'integer',
 106                      'label' => T_('Apply rel="nofollow"'),
 107                      'note'=>T_('hours. For how long should rel="nofollow" be applied to comment links? (0 means never, -1 means always)'),
 108                      'defaultvalue' => '-1', // use "nofollow" infinitely by default so lazy admins won't promote spam
 109                      'size' => 5,
 110                  ),
 111                  'check_url_referers' => array(
 112                      'type' => 'checkbox',
 113                      'label' => T_('Check referers for URL'),
 114                      'note' => T_('Check refering pages, if they contain our URL. This may generate a lot of additional traffic!'),
 115                      'defaultvalue' => '0',
 116                  ),
 117  
 118              );
 119      }
 120  
 121  
 122      /**
 123       * Handle max_number_of_links_feedback setting.
 124       *
 125       * Try to detect as many links as possible
 126       */
 127  	function GetSpamKarmaForComment( & $params )
 128      {
 129          $max_comments = $this->Settings->get('max_number_of_links_feedback');
 130          if( $max_comments != -1 )
 131          { // not deactivated:
 132              $count = preg_match_all( '~(https?|ftp)://~i', $params['Comment']->content, $matches );
 133  
 134              if( $count > $max_comments )
 135              {
 136                  return 100;
 137              }
 138  
 139              if( $count == 0 )
 140              {
 141                  return 0;
 142              }
 143  
 144              return (100/$max_comments) * $count;
 145          }
 146      }
 147  
 148  
 149      /**
 150       * Disable/Enable events according to settings.
 151       *
 152       * "AppendHitLog" gets enabled according to check_url_referers setting.
 153       * "BeforeTrackbackInsert" gets disabled, if we do not check for duplicate content.
 154       */
 155  	function BeforeEnable()
 156      {
 157          if( $this->Settings->get('check_url_referers') )
 158          {
 159              $this->enable_event( 'AppendHitLog' );
 160          }
 161          else
 162          {
 163              $this->disable_event( 'AppendHitLog' );
 164          }
 165  
 166          if( ! $this->Settings->get('check_dupes') )
 167          {
 168              $this->disable_event( 'BeforeTrackbackInsert' );
 169          }
 170          else
 171          {
 172              $this->enable_event( 'BeforeTrackbackInsert' );
 173          }
 174  
 175          return true;
 176      }
 177  
 178  
 179      /**
 180       * - Check for duplicate trackbacks.
 181       */
 182  	function BeforeTrackbackInsert( & $params )
 183      {
 184          if( $this->is_duplicate_comment( $params['Comment'] ) )
 185          {
 186              $this->msg( T_('The trackback seems to be a duplicate.'), 'error' );
 187          }
 188      }
 189  
 190  
 191  	function CommentFormSent( & $params )
 192      {
 193          if( $this->Settings->get('trim_whitespace') )
 194          {    // Strip whitespace
 195              $params['comment'] = trim( $params['comment'] );
 196          }
 197  
 198          if( $this->Settings->get('remove_repetitions') )
 199          {    // Remove repetitions
 200              $params['anon_name'] = $this->remove_repetition( $params['anon_name'] );
 201              $params['comment'] = $this->remove_repetition( $params['comment'] );
 202          }
 203      }
 204  
 205  
 206      /**
 207       * Check for duplicate comments.
 208       */
 209  	function BeforeCommentFormInsert( & $params )
 210      {
 211          if( $this->is_duplicate_comment( $params['Comment'] ) )
 212          {
 213              $this->msg( T_('The comment seems to be a duplicate.'), 'error' );
 214          }
 215  
 216          if( $this->Settings->get('block_common_spam') && preg_match_all( '~\[(link|url)=~', $params['Comment']->content, $m ) )
 217          {    // Block common bbcode spam comments with both [url= and [link= tags
 218              if( !empty($m[1]) && count($m[1]) > 1 )
 219              {
 220                  $this->msg( T_('Your comment was rejected because it appeared to be spam.'), 'error' );
 221              }
 222          }
 223      }
 224  
 225  
 226      /**
 227       * If we use "makelink", handle nofollow rel attrib.
 228       *
 229       * @uses basic_antispam_plugin::apply_nofollow()
 230       */
 231  	function FilterCommentAuthor( & $params )
 232      {
 233          if( ! $params['makelink'] )
 234          {
 235              return false;
 236          }
 237  
 238          $this->apply_nofollow( $params['data'], $params['Comment'] );
 239      }
 240  
 241  
 242      /**
 243       * Handle nofollow in author URL (if it's made clickable)
 244       *
 245       * @uses basic_antispam_plugin::FilterCommentAuthor()
 246       */
 247  	function FilterCommentAuthorUrl( & $params )
 248      {
 249          $this->FilterCommentAuthor( $params );
 250      }
 251  
 252  
 253      /**
 254       * Handle nofollow rel attrib in comment content.
 255       *
 256       * @uses basic_antispam_plugin::FilterCommentAuthor()
 257       */
 258  	function FilterCommentContent( & $params )
 259      {
 260          $this->apply_nofollow( $params['data'], $params['Comment'] );
 261      }
 262  
 263  
 264      /**
 265       * Do we want to apply rel="nofollow" tag?
 266       *
 267       * @return boolean
 268       */
 269  	function apply_nofollow( & $data, $Comment )
 270      {
 271          global $localtimenow;
 272  
 273          $hours = $this->Settings->get('nofollow_for_hours'); // 0=never, -1 always, otherwise for x hours
 274  
 275          if( $hours == 0 )
 276          { // "never"
 277              return;
 278          }
 279  
 280          if( $hours > 0 // -1 is "always"
 281              && mysql2timestamp( $Comment->date ) <= ( $localtimenow - $hours*3600 ) )
 282          {
 283              return;
 284          }
 285  
 286          $data = preg_replace_callback( '~(<a\s)([^>]+)>~i', create_function( '$m', '
 287                  if( preg_match( \'~\brel=([\\\'"])(.*?)\1~\', $m[2], $match ) )
 288                  { // there is already a rel attrib:
 289                      $rel_values = explode( " ", $match[2] );
 290  
 291                      if( ! in_array( \'nofollow\', $rel_values ) )
 292                      {
 293                          $rel_values[] = \'nofollow\';
 294                      }
 295  
 296                      return $m[1]
 297                          .preg_replace(
 298                              \'~\brel=([\\\'"]).*?\1~\',
 299                              \'rel=$1\'.implode( " ", $rel_values ).\'$1\',
 300                              $m[2] )
 301                          .">";
 302                  }
 303                  else
 304                  {
 305                      return $m[1].$m[2].\' rel="nofollow">\';
 306                  }' ), $data );
 307      }
 308  
 309  
 310  	function remove_repetition( $str = '' )
 311      {
 312          if( ($newstring = @preg_replace( '~(.)\\1{3,}~u', '$1$1$1', $str )) === NULL )
 313          {    // Some error occured, just return the original string
 314              $newstring = $str;
 315          }
 316          return $newstring;
 317      }
 318  
 319  
 320      /**
 321       * Check if the deprecated hit_doublecheck_referer setting is set and then
 322       * do not disable the AppendHitLog event. Also removes the old setting.
 323       */
 324  	function AfterInstall()
 325      {
 326          global $Settings;
 327  
 328          if( $Settings->get('hit_doublecheck_referer') )
 329          { // old general settings, "transform it"
 330              $this->Settings->set( 'check_url_referers', '1' );
 331              $this->Settings->dbupdate();
 332          }
 333  
 334          $Settings->delete('hit_doublecheck_referer');
 335          $Settings->dbupdate();
 336      }
 337  
 338  
 339      /**
 340       * Check if our Host+URI is in the referred page, preferrably through
 341       * {@link register_shutdown_function()}.
 342       *
 343       * @return boolean true, if we handle {@link Hit::record_the_hit() recording of the Hit} ourself
 344       */
 345  	function AppendHitLog( & $params )
 346      {
 347          $Hit = & $params['Hit'];
 348  
 349          if( $Hit->referer_type != 'referer' )
 350          {
 351              return false;
 352          }
 353  
 354          if( function_exists( 'register_shutdown_function' ) )
 355          { // register it as a shutdown function, because it will be slow!
 356              $this->debug_log( 'AppendHitLog: loading referering page.. (through register_shutdown_function())' );
 357  
 358              register_shutdown_function( array( &$this, 'double_check_referer' ), $Hit->referer ); // this will also call Hit::record_the_hit()
 359          }
 360          else
 361          {
 362              // flush now, so that the meat of the page will get shown before it tries to check back against the refering URL.
 363              evo_flush();
 364  
 365              $this->debug_log( 'AppendHitLog: loading referering page..' );
 366  
 367              $this->double_check_referer($Hit->referer); // this will also call Hit::record_the_hit()
 368          }
 369  
 370          return true; // we handle recording
 371      }
 372  
 373  
 374      /**
 375       * This function gets called (as a {@link register_shutdown_function() shutdown function}, if possible) and checks
 376       * if the referering URL's content includes the current URL - if not it is probably spam!
 377       *
 378       * On success, this methods records the hit.
 379       *
 380       * @uses Hit::record_the_hit()
 381       */
 382  	function double_check_referer( $referer )
 383      {
 384          global $Hit, $ReqURI;
 385  
 386          if( $this->is_referer_linking_us( $referer, $ReqURI ) )
 387          {
 388              $Hit->record_the_hit();
 389          }
 390  
 391          return;
 392      }
 393  
 394  
 395      /**
 396       * Check the content of a given URL (referer), if the requested URI (with different hostname variations)
 397       * is present.
 398       *
 399       * @todo Use DB cache to avoid checking the same page again and again! (Plugin DB table)
 400       *
 401       * @param string
 402       * @param string URI to append to matching pattern for hostnames
 403       * @return boolean
 404       */
 405  	function is_referer_linking_us( $referer, $uri )
 406      {
 407          global $misc_inc_path, $lib_subdir, $ReqHost;
 408  
 409          if( empty($referer) )
 410          {
 411              return false;
 412          }
 413  
 414          // Load page content (max. 500kb), using fsockopen:
 415          $url_parsed = @parse_url($referer);
 416          if( ! $url_parsed )
 417          {
 418              return false;
 419          }
 420          if( empty($url_parsed['scheme']) ) {
 421              $url_parsed = parse_url('http://'.$referer);
 422          }
 423  
 424          $host = $url_parsed['host'];
 425          $port = ( empty($url_parsed['port']) ? 80 : $url_parsed['port'] );
 426          $path = empty($url_parsed['path']) ? '/' : $url_parsed['path'];
 427          if( ! empty($url_parsed['query']) )
 428          {
 429              $path .= '?'.$url_parsed['query'];
 430          }
 431  
 432          $fp = @fsockopen($host, $port, $errno, $errstr, 30);
 433          if( ! $fp )
 434          { // could not access referring page
 435              $this->debug_log( 'is_referer_linking_us(): could not access &laquo;'.$referer.'&raquo; (host: '.$host.'): '.$errstr.' (#'.$errno.')' );
 436              return false;
 437          }
 438  
 439          // Set timeout for data:
 440          if( function_exists('stream_set_timeout') )
 441              stream_set_timeout( $fp, 20 ); // PHP 4.3.0
 442          else
 443              socket_set_timeout( $fp, 20 ); // PHP 4
 444  
 445          // Send request:
 446          $out = "GET $path HTTP/1.0\r\n";
 447          $out .= "Host: $host:$port\r\n";
 448          $out .= "Connection: Close\r\n\r\n";
 449          fwrite($fp, $out);
 450  
 451          // Skip headers:
 452          $i = 0;
 453          $source_charset = 'iso-8859-1'; // default
 454          while( ($s = fgets($fp, 4096)) !== false )
 455          {
 456              $i++;
 457              if( $s == "\r\n" || $i > 100 /* max 100 head lines */ )
 458              {
 459                  break;
 460              }
 461              if( preg_match('~^Content-Type:.*?charset=([\w-]+)~i', $s, $match ) )
 462              {
 463                  $source_charset = $match[1];
 464              }
 465          }
 466  
 467          // Get the refering page's content
 468          $content_ref_page = '';
 469          $bytes_read = 0;
 470          while( ($s = fgets($fp, 4096)) !== false )
 471          {
 472              $content_ref_page .= $s;
 473              $bytes_read += strlen($s);
 474              if( $bytes_read > 512000 )
 475              { // do not pull more than 500kb of data!
 476                  break;
 477              }
 478          }
 479          fclose($fp);
 480  
 481          if( ! strlen($content_ref_page) )
 482          {
 483              $this->debug_log( 'is_referer_linking_us(): empty $content_ref_page ('.bytesreadable($bytes_read).' read)' );
 484              return false;
 485          }
 486  
 487  
 488          $have_idn_name = false;
 489  
 490          // Build the search pattern:
 491          // We match for basically for 'href="[SERVER][URI]', where [SERVER] is a list of possible hosts (especially IDNA)
 492          $search_pattern = '~\shref=["\']?https?://(';
 493          $possible_hosts = array( $_SERVER['HTTP_HOST'] );
 494          if( $_SERVER['SERVER_NAME'] != $_SERVER['HTTP_HOST'] )
 495          {
 496              $possible_hosts[] = $_SERVER['SERVER_NAME'];
 497          }
 498          $search_pattern_hosts = array();
 499          foreach( $possible_hosts as $l_host )
 500          {
 501              if( preg_match( '~^([^.]+\.)(.*?)([^.]+\.[^.]+)$~', $l_host, $match ) )
 502              { // we have subdomains in this hostname
 503                  if( stristr( $match[1], 'www' ) )
 504                  { // search also for hostname without 'www.'
 505                      $search_pattern_hosts[] = $match[2].$match[3];
 506                  }
 507              }
 508              $search_pattern_hosts[] = $l_host;
 509          }
 510          $search_pattern_hosts = array_unique($search_pattern_hosts);
 511          foreach( $search_pattern_hosts as $l_host )
 512          { // add IDN, because this could be linked:
 513              $l_idn_host = idna_decode( $l_host ); // the decoded puny-code ("xn--..") name (utf8)
 514  
 515              if( $l_idn_host != $l_host )
 516              {
 517                  $have_idn_name = true;
 518                  $search_pattern_hosts[] = $l_idn_host;
 519              }
 520          }
 521  
 522          // add hosts to pattern, preg_quoted
 523          for( $i = 0, $n = count($search_pattern_hosts); $i < $n; $i++ )
 524          {
 525              $search_pattern_hosts[$i] = preg_quote( $search_pattern_hosts[$i], '~' );
 526          }
 527          $search_pattern .= implode( '|', $search_pattern_hosts ).')';
 528          if( empty($uri) )
 529          { // host(s) should end with "/", "'", '"', "?" or whitespace
 530              $search_pattern .= '[/"\'\s?]';
 531          }
 532          else
 533          {
 534              $search_pattern .= preg_quote($uri, '~');
 535              // URI should end with "'", '"' or whitespace
 536              $search_pattern .= '["\'\s]';
 537          }
 538          $search_pattern .= '~i';
 539  
 540          if( $have_idn_name )
 541          { // Convert charset to UTF-8, because the decoded domain name is UTF-8, too:
 542              if( can_convert_charsets( 'utf-8', $source_charset ) )
 543              {
 544                  $content_ref_page = convert_charset( $content_ref_page, 'utf-8', $source_charset );
 545              }
 546              else
 547              {
 548                  $this->debug_log( 'is_referer_linking_us(): warning: cannot convert charset of referring page' );
 549              }
 550          }
 551  
 552          if( preg_match( $search_pattern, $content_ref_page ) )
 553          {
 554              $this->debug_log( 'is_referer_linking_us(): found current URL in page ('.bytesreadable($bytes_read).' read)' );
 555  
 556              return true;
 557          }
 558          else
 559          {
 560              if( strpos( $referer, $ReqHost ) === 0 && ! empty($uri) )
 561              { // Referer is the same host.. just search for $uri
 562                  if( strpos( $content_ref_page, $uri ) !== false )
 563                  {
 564                      $this->debug_log( 'is_referer_linking_us(): found current URI in page ('.bytesreadable($bytes_read).' read)' );
 565  
 566                      return true;
 567                  }
 568              }
 569              $this->debug_log( 'is_referer_linking_us(): '.sprintf('did not find &laquo;%s&raquo; in &laquo;%s&raquo; (%s bytes read).', $search_pattern, $referer, bytesreadable($bytes_read) ) );
 570  
 571              return false;
 572          }
 573      }
 574  
 575  
 576      /**
 577       * Simple check for duplicate comment/content from same author
 578       *
 579       * @param Comment
 580       */
 581  	function is_duplicate_comment( $Comment )
 582      {
 583          global $DB;
 584  
 585          if( ! $this->Settings->get('check_dupes') )
 586          {
 587              return false;
 588          }
 589  
 590          if( $Comment->content == '' )
 591          { // User may has many comments with empty content but with attachment pictures
 592              return false;
 593          }
 594  
 595          $sql = '
 596                  SELECT comment_ID
 597                    FROM T_comments
 598                   WHERE comment_post_ID = '.$Comment->item_ID;
 599  
 600          if( isset($Comment->author_user_ID) )
 601          { // registered user:
 602              $sql .= ' AND comment_author_ID = '.$Comment->author_user_ID;
 603          }
 604          else
 605          { // visitor (also trackback):
 606              $sql_ors = array();
 607              if( ! empty($Comment->author) )
 608              {
 609                  $sql_ors[] = 'comment_author = '.$DB->quote($Comment->author);
 610              }
 611              if( ! empty($Comment->author_email) )
 612              {
 613                  $sql_ors[] = 'comment_author_email = '.$DB->quote($Comment->author_email);
 614              }
 615              if( ! empty($Comment->author_url) )
 616              {
 617                  $sql_ors[] = 'comment_author_url = '.$DB->quote($Comment->author_url);
 618              }
 619  
 620              if( ! empty($sql_ors) )
 621              {
 622                  $sql .= ' AND ( '.implode( ' OR ', $sql_ors ).' )';
 623              }
 624          }
 625  
 626          $sql .= ' AND comment_content = '.$DB->quote($Comment->content).' LIMIT 1';
 627  
 628          return $DB->get_var( $sql, 0, 0, 'Checking for duplicate feedback content.' );
 629      }
 630  
 631  
 632      /**
 633       * A little housekeeping.
 634       * @return true
 635       */
 636  	function PluginVersionChanged( & $params )
 637      {
 638          $this->Settings->delete('check_url_trackbacks');
 639          $this->Settings->dbupdate();
 640          return true;
 641      }
 642  
 643  }
 644  
 645  ?>

title

Description

title

Description

title

Description

title

title

Body