b2evolution PHP Cross Reference Blogging Systems

Source: /inc/locales/_charset.funcs.php - 194 lines - 6851 bytes - Summary - Text - Print

Description: This file implements functions for handling charsets. This file is part of the evoCore framework - {@link http://evocore.net/} See also {@link http://sourceforge.net/projects/evocms/}.

   1  <?php
   2  /**
   3   * This file implements functions for handling charsets.
   4   *
   5   * This file is part of the evoCore framework - {@link http://evocore.net/}
   6   * See also {@link http://sourceforge.net/projects/evocms/}.
   7   *
   8   * @copyright (c)2003-2014 by Francois Planque - {@link http://fplanque.com/}
   9   * Parts of this file are copyright (c)2004-2006 by Daniel HAHLER - {@link http://daniel.hahler.de/}.
  10   *
  11   * {@internal License choice
  12   * - If you have received this file as part of a package, please find the license.txt file in
  13   *   the same folder or the closest folder above for complete license terms.
  14   * - If you have received this file individually (e-g: from http://evocms.cvs.sourceforge.net/)
  15   *   then you must choose one of the following licenses before using the file:
  16   *   - GNU General Public License 2 (GPL) - http://www.opensource.org/licenses/gpl-license.php
  17   *   - Mozilla Public License 1.1 (MPL) - http://www.opensource.org/licenses/mozilla1.1.php
  18   * }}
  19   *
  20   * {@internal Open Source relicensing agreement:
  21   * Daniel HAHLER grants Francois PLANQUE the right to license
  22   * Daniel HAHLER's contributions to this file and the b2evolution project
  23   * under any OSI approved OSS license (http://www.opensource.org/licenses/).
  24   * }}
  25   *
  26   * @package evocore
  27   *
  28   * {@internal Below is a list of authors who have contributed to design/coding of this file: }}
  29   * @author blueyed: Daniel HAHLER.
  30   * @author fplanque: Francois PLANQUE.
  31   *
  32   * @version $Id: _charset.funcs.php 6136 2014-03-08 07:59:48Z manuel $
  33   *
  34   * @todo dh> Move this to some other directory?
  35   */
  36  if( !defined('EVO_MAIN_INIT') ) die( 'Please, do not access this page directly.' );
  37  
  38  
  39  /**
  40   * Use iconv() to transliterate non-ASCII chars in a string encoded with $evo_charset.
  41   *
  42   * This function will figure out a usable LC_CTYPE setting and revert it to the original value
  43   * after calling iconv().
  44   *
  45   * @author Tilman BLUMENBACH - tblue246
  46   * @todo Tblue> Try more locales.
  47   *
  48   * @param string The string to transliterate.
  49   * @param NULL|string The post locale. NULL to not try switching to it.
  50   * @return string|boolean The transliterated ASCII string on success or false on failure.
  51   */
  52  function evo_iconv_transliterate( $str, $post_locale = NULL )
  53  {
  54      global $evo_charset, $current_locale, $default_locale;
  55  
  56      if( ! function_exists( 'iconv' ) )
  57      {
  58          return false;
  59      }
  60  
  61      // iconv() needs a proper LC_CTYPE to work.
  62      // See http://www.php.net/manual/en/function.iconv.php#94481
  63      $orig_lc_ctype  = setlocale( LC_CTYPE, 0 );
  64      $lc_evo_charset = strtolower( str_replace( '-', '', $evo_charset ) );
  65  
  66      $locales_to_try = array(
  67          str_replace( '-', '_', $current_locale ).'.'.$lc_evo_charset, // Try to use current b2evo locale
  68          str_replace( '-', '_', $default_locale ).'.'.$lc_evo_charset, // Fallback to default b2evo locale
  69      );
  70      if( $post_locale !== NULL )
  71      {    // Try to switch to the post locale:
  72          array_unshift( $locales_to_try, str_replace( '-', '_', $post_locale ).'.'.$lc_evo_charset );
  73      }
  74  
  75      if( setlocale( LC_CTYPE, $locales_to_try ) === false )
  76      {    // The last thing we try is to use the system locale with our charset.
  77          if( ( $pos = strrpos( $orig_lc_ctype, '.' ) ) !== false )
  78          {    // Remove existing charset string:
  79              $syslocale = substr( $orig_lc_ctype, 0, $pos );
  80          }
  81          else
  82          {
  83              $syslocale = $orig_lc_ctype;
  84          }
  85  
  86          if( setlocale( LC_CTYPE, $syslocale.'.'.$lc_evo_charset ) === false )
  87          {    // We could not set a usable locale, giving up...
  88              return false;
  89          }
  90      }
  91  
  92      //pre_dump( setlocale( LC_CTYPE, 0 ) );
  93  
  94      // Transliterate the string:
  95      $newstr = iconv( $evo_charset, 'ASCII//TRANSLIT', $str );
  96  
  97      // Restore the original locale:
  98      setlocale( LC_CTYPE, $orig_lc_ctype );
  99  
 100      return $newstr;
 101  }
 102  
 103  
 104  /**
 105   * Convert special chars (like german umlauts) to ASCII characters.
 106   *
 107   * @param string Input string to operate on
 108   * @param NULL|string The post locale or NULL if there is no specific locale.
 109   *                    Gets passed to evo_iconv_transliterate().
 110   * @return string The input string with replaced chars.
 111   */
 112  function replace_special_chars( $str, $post_locale = NULL )
 113  {
 114      global $evo_charset, $default_locale, $current_locale, $locales;
 115  
 116      // Decode entities to be able to transliterate the associated chars:
 117      // Tblue> TODO: Check if this could have side effects.
 118      $str = html_entity_decode( $str, ENT_NOQUOTES, $evo_charset );
 119  
 120      $our_locale = $post_locale;
 121      if( $our_locale === NULL )
 122      {    // post locale is not set, try to guess current locale
 123          if( !empty($default_locale) )
 124          {
 125              $our_locale = $default_locale;
 126          }
 127          if( !empty($current_locale) )
 128          {    // Override with current locale if available
 129              $our_locale = $current_locale;
 130          }
 131      }
 132      if( $our_locale !== NULL && isset($locales[$our_locale]) && !empty($locales[$our_locale]['transliteration_map']) )
 133      {    // Use locale 'transliteration_map' if present
 134          if( ! array_key_exists( '', $locales[$our_locale]['transliteration_map'] ) )
 135          {    // Make sure there's no empty string key, otherwise strtr() returns false
 136              if( $tmp_str = strtr( $str, $locales[$our_locale]['transliteration_map'] ) );
 137              {    // Use newly transliterated string
 138                  $str = $tmp_str;
 139              }
 140          }
 141      }
 142  
 143      if( ( $newstr = evo_iconv_transliterate( $str, $post_locale ) ) !== false )
 144      {    // iconv allows us to get nice URL titles by transliterating non-ASCII chars.
 145          // Tblue> htmlentities() does not know anything about ASCII?! ISO-8859-1 will work too, though.
 146          $newstr_charset = 'ISO-8859-1';
 147      }
 148      // TODO: sam2kb> convert this to 'transliteration_map'
 149      else if( can_convert_charsets('UTF-8', $evo_charset) && can_convert_charsets('UTF-8', 'ISO-8859-1') /* source */ )
 150      {    // Fallback to the limited old method: Transliterate only a few known chars.
 151          $newstr = convert_charset( $str, 'UTF-8', $evo_charset );
 152          $newstr_charset = 'UTF-8';
 153  
 154          $search = array( '', '', '', '', '', '', '', '', '', '', '', '', '', '', '' ); // iso-8859-1
 155          $replace = array( 'Ae', 'ae', 'Oe', 'oe', 'Ue', 'ue', 'ss', 'a', 'c', 'e', 'e', 'i', 'o', 'o', 'u' );
 156  
 157          foreach( $search as $k => $v )
 158          { // convert $search to UTF-8
 159              $search[$k] = convert_charset( $v, 'UTF-8', 'ISO-8859-1' );
 160          }
 161  
 162          $newstr = str_replace( $search, $replace, $newstr );
 163      }
 164      else
 165      {
 166          // Replace HTML entities only.
 167          $newstr = $str;
 168          $newstr_charset = $evo_charset;
 169      }
 170  
 171      // Replace HTML entities
 172      $newstr = htmlentities( $newstr, ENT_NOQUOTES, $newstr_charset );
 173  
 174      // Handle special entities (e.g., use "-" instead of "a" for "&"):
 175      $newstr = str_replace(
 176          array( '&amp;', '&laquo;', '&raquo;' ),
 177          '-',
 178          $newstr );
 179  
 180  
 181      // Keep only one char in entities!
 182      $newstr = preg_replace( '/&(.).+?;/', '$1', $newstr );
 183      // Replace non acceptable chars
 184      $newstr = preg_replace( '/[^A-Za-z0-9_]+/', '-', $newstr );
 185      // Remove '-' at start and end:
 186      $newstr = preg_replace( '/^-+/', '', $newstr );
 187      $newstr = preg_replace( '/-+$/', '', $newstr );
 188  
 189      //pre_dump( $str, $newstr );
 190  
 191      return $newstr;
 192  }
 193  
 194  ?>

title

Description

title

Description

title

Description

title

title

Body