b2evolution PHP Cross Reference Blogging Systems

Source: /inc/_ext/_url_rel2abs.php - 439 lines - 14897 bytes - Summary - Text - Print

Description: Copyright (c) 2008, David R. Nadeau, NadeauSoftware.com. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

   1  <?php
   2  /**

   3   * Copyright (c) 2008, David R. Nadeau, NadeauSoftware.com.

   4   * All rights reserved.

   5   *

   6   * Redistribution and use in source and binary forms, with or without

   7   * modification, are permitted provided that the following conditions

   8   * are met:

   9   *

  10   *    * Redistributions of source code must retain the above copyright

  11   *      notice, this list of conditions and the following disclaimer.

  12   *

  13   *    * Redistributions in binary form must reproduce the above

  14   *      copyright notice, this list of conditions and the following

  15   *      disclaimer in the documentation and/or other materials provided

  16   *      with the distribution.

  17   *

  18   *    * Neither the names of David R. Nadeau or NadeauSoftware.com, nor

  19   *      the names of its contributors may be used to endorse or promote

  20   *      products derived from this software without specific prior

  21   *      written permission.

  22   *

  23   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

  24   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

  25   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS

  26   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

  27   * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

  28   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,

  29   * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

  30   * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER

  31   * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

  32   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY

  33   * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY

  34   * OF SUCH DAMAGE.

  35   */
  36  
  37  /*

  38   * This is a BSD License approved by the Open Source Initiative (OSI).

  39   * See:  http://www.opensource.org/licenses/bsd-license.php

  40   */
  41  
  42  if( !defined('EVO_MAIN_INIT') ) die( 'Please, do not access this page directly.' );
  43  
  44  /**

  45   * Combine a base URL and a relative URL to produce a new

  46   * absolute URL.  The base URL is often the URL of a page,

  47   * and the relative URL is a URL embedded on that page.

  48   *

  49   * This function implements the "absolutize" algorithm from

  50   * the RFC3986 specification for URLs.

  51   *

  52   * This function supports multi-byte characters with the UTF-8 encoding,

  53   * per the URL specification.

  54   *

  55   * Parameters:

  56   * url - the relative URL to convert

  57   * baseUrl - the absolute base URL

  58   *

  59   * Return values:

  60   *     An absolute URL that combines parts of the base and relative

  61   *     URLs, or FALSE if the base URL is not absolute or if either

  62   *     URL cannot be parsed.

  63   */
  64  function url_to_absolute( $relativeUrl, $baseUrl )
  65  {
  66      // If relative URL has a scheme, clean path and return.

  67      $r = split_url( $relativeUrl );
  68      if ( $r === FALSE )
  69          return FALSE;
  70      if ( !empty( $r['scheme'] ) )
  71      {
  72          if ( !empty( $r['path'] ) && $r['path'][0] == '/' )
  73              $r['path'] = url_remove_dot_segments( $r['path'] );
  74          return join_url( $r );
  75      }
  76  
  77      // Make sure the base URL is absolute.

  78      $b = split_url( $baseUrl );
  79      if ( $b === FALSE || empty( $b['scheme'] ) || empty( $b['host'] ) )
  80          return FALSE;
  81      $r['scheme'] = $b['scheme'];
  82  
  83      // If relative URL has an authority, clean path and return.

  84      if ( isset( $r['host'] ) )
  85      {
  86          if ( !empty( $r['path'] ) )
  87              $r['path'] = url_remove_dot_segments( $r['path'] );
  88          return join_url( $r );
  89      }
  90      unset( $r['port'] );
  91      unset( $r['user'] );
  92      unset( $r['pass'] );
  93  
  94      // Copy base authority.

  95      $r['host'] = $b['host'];
  96      if ( isset( $b['port'] ) ) $r['port'] = $b['port'];
  97      if ( isset( $b['user'] ) ) $r['user'] = $b['user'];
  98      if ( isset( $b['pass'] ) ) $r['pass'] = $b['pass'];
  99  
 100      // If relative URL has no path, use base path

 101      if ( empty( $r['path'] ) )
 102      {
 103          if ( !empty( $b['path'] ) )
 104              $r['path'] = $b['path'];
 105          if ( !isset( $r['query'] ) && isset( $b['query'] ) )
 106              $r['query'] = $b['query'];
 107          return join_url( $r );
 108      }
 109  
 110      // If relative URL path doesn't start with /, merge with base path

 111      if ( $r['path'][0] != '/' )
 112      {
 113          if( !isset($b['path']) ) $b['path'] = '';
 114          $base = mb_strrchr( $b['path'], '/', TRUE, 'UTF-8' );
 115          if ( $base === FALSE ) $base = '';
 116          $r['path'] = $base . '/' . $r['path'];
 117      }
 118      $r['path'] = url_remove_dot_segments( $r['path'] );
 119      return join_url( $r );
 120  }
 121  
 122  /**

 123   * Filter out "." and ".." segments from a URL's path and return

 124   * the result.

 125   *

 126   * This function implements the "remove_dot_segments" algorithm from

 127   * the RFC3986 specification for URLs.

 128   *

 129   * This function supports multi-byte characters with the UTF-8 encoding,

 130   * per the URL specification.

 131   *

 132   * Parameters:

 133   *     path    the path to filter

 134   *

 135   * Return values:

 136   *     The filtered path with "." and ".." removed.

 137   */
 138  function url_remove_dot_segments( $path )
 139  {
 140      // multi-byte character explode

 141      $inSegs  = preg_split( '!/!u', $path );
 142      $outSegs = array( );
 143      foreach ( $inSegs as $seg )
 144      {
 145          if ( $seg == '' || $seg == '.')
 146              continue;
 147          if ( $seg == '..' )
 148              array_pop( $outSegs );
 149          else
 150              array_push( $outSegs, $seg );
 151      }
 152      $outPath = implode( '/', $outSegs );
 153      if ( $path[0] == '/' )
 154          $outPath = '/' . $outPath;
 155      // compare last multi-byte character against '/'

 156      if ( $outPath != '/' &&
 157          (mb_strlen($path)-1) == mb_strrpos( $path, '/', 'UTF-8' ) )
 158          $outPath .= '/';
 159      return $outPath;
 160  }
 161  
 162  
 163  /**

 164   * This function parses an absolute or relative URL and splits it

 165   * into individual components.

 166   *

 167   * RFC3986 specifies the components of a Uniform Resource Identifier (URI).

 168   * A portion of the ABNFs are repeated here:

 169   *

 170   *    URI-reference    = URI

 171   *            / relative-ref

 172   *

 173   *    URI        = scheme ":" hier-part [ "?" query ] [ "#" fragment ]

 174   *

 175   *    relative-ref    = relative-part [ "?" query ] [ "#" fragment ]

 176   *

 177   *    hier-part    = "//" authority path-abempty

 178   *            / path-absolute

 179   *            / path-rootless

 180   *            / path-empty

 181   *

 182   *    relative-part    = "//" authority path-abempty

 183   *            / path-absolute

 184   *            / path-noscheme

 185   *            / path-empty

 186   *

 187   *    authority    = [ userinfo "@" ] host [ ":" port ]

 188   *

 189   * So, a URL has the following major components:

 190   *

 191   *    scheme

 192   *        The name of a method used to interpret the rest of

 193   *        the URL.  Examples:  "http", "https", "mailto", "file'.

 194   *

 195   *    authority

 196   *        The name of the authority governing the URL's name

 197   *        space.  Examples:  "example.com", "user@example.com",

 198   *        "example.com:80", "user:password@example.com:80".

 199   *

 200   *        The authority may include a host name, port number,

 201   *        user name, and password.

 202   *

 203   *        The host may be a name, an IPv4 numeric address, or

 204   *        an IPv6 numeric address.

 205   *

 206   *    path

 207   *        The hierarchical path to the URL's resource.

 208   *        Examples:  "/index.htm", "/scripts/page.php".

 209   *

 210   *    query

 211   *        The data for a query.  Examples:  "?search=google.com".

 212   *

 213   *    fragment

 214   *        The name of a secondary resource relative to that named

 215   *        by the path.  Examples:  "#section1", "#header".

 216   *

 217   * An "absolute" URL must include a scheme and path.  The authority, query,

 218   * and fragment components are optional.

 219   *

 220   * A "relative" URL does not include a scheme and must include a path.  The

 221   * authority, query, and fragment components are optional.

 222   *

 223   * This function splits the $url argument into the following components

 224   * and returns them in an associative array.  Keys to that array include:

 225   *

 226   *    "scheme"    The scheme, such as "http".

 227   *    "host"        The host name, IPv4, or IPv6 address.

 228   *    "port"        The port number.

 229   *    "user"        The user name.

 230   *    "pass"        The user password.

 231   *    "path"        The path, such as a file path for "http".

 232   *    "query"        The query.

 233   *    "fragment"    The fragment.

 234   *

 235   * One or more of these may not be present, depending upon the URL.

 236   *

 237   * Optionally, the "user", "pass", "host" (if a name, not an IP address),

 238   * "path", "query", and "fragment" may have percent-encoded characters

 239   * decoded.  The "scheme" and "port" cannot include percent-encoded

 240   * characters and are never decoded.  Decoding occurs after the URL has

 241   * been parsed.

 242   *

 243   * Parameters:

 244   *     url        the URL to parse.

 245   *

 246   *     decode        an optional boolean flag selecting whether

 247   *             to decode percent encoding or not.  Default = TRUE.

 248   *

 249   * Return values:

 250   *     the associative array of URL parts, or FALSE if the URL is

 251   *     too malformed to recognize any parts.

 252   */
 253  function split_url( $url, $decode=FALSE)
 254  {
 255      // Character sets from RFC3986.

 256      $xunressub     = 'a-zA-Z\d\-._~\!$&\'()*+,;=';
 257      $xpchar        = $xunressub . ':@%';
 258  
 259      // Scheme from RFC3986.

 260      $xscheme        = '([a-zA-Z][a-zA-Z\d+-.]*)';
 261  
 262      // User info (user + password) from RFC3986.

 263      $xuserinfo     = '((['  . $xunressub . '%]*)' .
 264                       '(:([' . $xunressub . ':%]*))?)';
 265  
 266      // IPv4 from RFC3986 (without digit constraints).

 267      $xipv4         = '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})';
 268  
 269      // IPv6 from RFC2732 (without digit and grouping constraints).

 270      $xipv6         = '(\[([a-fA-F\d.:]+)\])';
 271  
 272      // Host name from RFC1035.  Technically, must start with a letter.

 273      // Relax that restriction to better parse URL structure, then

 274      // leave host name validation to application.

 275      $xhost_name    = '([a-zA-Z\d-.%]+)';
 276  
 277      // Authority from RFC3986.  Skip IP future.

 278      $xhost         = '(' . $xhost_name . '|' . $xipv4 . '|' . $xipv6 . ')';
 279      $xport         = '(\d*)';
 280      $xauthority    = '((' . $xuserinfo . '@)?' . $xhost .
 281                   '?(:' . $xport . ')?)';
 282  
 283      // Path from RFC3986.  Blend absolute & relative for efficiency.

 284      $xslash_seg    = '(/[' . $xpchar . ']*)';
 285      $xpath_authabs = '((//' . $xauthority . ')((/[' . $xpchar . ']*)*))';
 286      $xpath_rel     = '([' . $xpchar . ']+' . $xslash_seg . '*)';
 287      $xpath_abs     = '(/(' . $xpath_rel . ')?)';
 288      $xapath        = '(' . $xpath_authabs . '|' . $xpath_abs .
 289               '|' . $xpath_rel . ')';
 290  
 291      // Query and fragment from RFC3986.

 292      $xqueryfrag    = '([' . $xpchar . '/?' . ']*)';
 293  
 294      // URL.

 295      $xurl          = '^(' . $xscheme . ':)?' .  $xapath . '?' .
 296                       '(\?' . $xqueryfrag . ')?(#' . $xqueryfrag . ')?$';
 297  
 298  
 299      // Split the URL into components.

 300      if ( !preg_match( '!' . $xurl . '!', $url, $m ) )
 301          return FALSE;
 302  
 303      if ( !empty($m[2]) )        $parts['scheme']  = strtolower($m[2]);
 304  
 305      if ( !empty($m[7]) ) {
 306          if ( isset( $m[9] ) )    $parts['user']    = $m[9];
 307          else            $parts['user']    = '';
 308      }
 309      if ( !empty($m[10]) )        $parts['pass']    = $m[11];
 310  
 311      if ( !empty($m[13]) )        $h=$parts['host'] = $m[13];
 312      else if ( !empty($m[14]) )    $parts['host']    = $m[14];
 313      else if ( !empty($m[16]) )    $parts['host']    = $m[16];
 314      else if ( !empty( $m[5] ) )    $parts['host']    = '';
 315      if ( !empty($m[17]) )        $parts['port']    = $m[18];
 316  
 317      if ( !empty($m[19]) )        $parts['path']    = $m[19];
 318      else if ( !empty($m[21]) )    $parts['path']    = $m[21];
 319      else if ( !empty($m[25]) )    $parts['path']    = $m[25];
 320  
 321      if ( !empty($m[27]) )        $parts['query']   = $m[28];
 322      if ( !empty($m[29]) )        $parts['fragment']= $m[30];
 323  
 324      if ( !$decode )
 325          return $parts;
 326      if ( !empty($parts['user']) )
 327          $parts['user']     = rawurldecode( $parts['user'] );
 328      if ( !empty($parts['pass']) )
 329          $parts['pass']     = rawurldecode( $parts['pass'] );
 330      if ( !empty($parts['path']) )
 331          $parts['path']     = rawurldecode( $parts['path'] );
 332      if ( isset($h) )
 333          $parts['host']     = rawurldecode( $parts['host'] );
 334      if ( !empty($parts['query']) )
 335          $parts['query']    = rawurldecode( $parts['query'] );
 336      if ( !empty($parts['fragment']) )
 337          $parts['fragment'] = rawurldecode( $parts['fragment'] );
 338      return $parts;
 339  }
 340  
 341  
 342  /**

 343   * This function joins together URL components to form a complete URL.

 344   *

 345   * RFC3986 specifies the components of a Uniform Resource Identifier (URI).

 346   * This function implements the specification's "component recomposition"

 347   * algorithm for combining URI components into a full URI string.

 348   *

 349   * The $parts argument is an associative array containing zero or

 350   * more of the following:

 351   *

 352   *    "scheme"    The scheme, such as "http".

 353   *    "host"        The host name, IPv4, or IPv6 address.

 354   *    "port"        The port number.

 355   *    "user"        The user name.

 356   *    "pass"        The user password.

 357   *    "path"        The path, such as a file path for "http".

 358   *    "query"        The query.

 359   *    "fragment"    The fragment.

 360   *

 361   * The "port", "user", and "pass" values are only used when a "host"

 362   * is present.

 363   *

 364   * The optional $encode argument indicates if appropriate URL components

 365   * should be percent-encoded as they are assembled into the URL.  Encoding

 366   * is only applied to the "user", "pass", "host" (if a host name, not an

 367   * IP address), "path", "query", and "fragment" components.  The "scheme"

 368   * and "port" are never encoded.  When a "scheme" and "host" are both

 369   * present, the "path" is presumed to be hierarchical and encoding

 370   * processes each segment of the hierarchy separately (i.e., the slashes

 371   * are left alone).

 372   *

 373   * The assembled URL string is returned.

 374   *

 375   * Parameters:

 376   *     parts        an associative array of strings containing the

 377   *             individual parts of a URL.

 378   *

 379   *     encode        an optional boolean flag selecting whether

 380   *             to do percent encoding or not.  Default = true.

 381   *

 382   * Return values:

 383   *     Returns the assembled URL string.  The string is an absolute

 384   *     URL if a scheme is supplied, and a relative URL if not.  An

 385   *     empty string is returned if the $parts array does not contain

 386   *     any of the needed values.

 387   */
 388  function join_url( $parts, $encode=FALSE)
 389  {
 390      if ( $encode )
 391      {
 392          if ( isset( $parts['user'] ) )
 393              $parts['user']     = rawurlencode( $parts['user'] );
 394          if ( isset( $parts['pass'] ) )
 395              $parts['pass']     = rawurlencode( $parts['pass'] );
 396          if ( isset( $parts['host'] ) &&
 397              !preg_match( '!^(\[[\da-f.:]+\]])|([\da-f.:]+)$!ui', $parts['host'] ) )
 398              $parts['host']     = rawurlencode( $parts['host'] );
 399          if ( !empty( $parts['path'] ) )
 400              $parts['path']     = preg_replace( '!%2F!ui', '/',
 401                  rawurlencode( $parts['path'] ) );
 402          if ( isset( $parts['query'] ) )
 403              $parts['query']    = rawurlencode( $parts['query'] );
 404          if ( isset( $parts['fragment'] ) )
 405              $parts['fragment'] = rawurlencode( $parts['fragment'] );
 406      }
 407  
 408      $url = '';
 409      if ( !empty( $parts['scheme'] ) )
 410          $url .= $parts['scheme'] . ':';
 411      if ( isset( $parts['host'] ) )
 412      {
 413          $url .= '//';
 414          if ( isset( $parts['user'] ) )
 415          {
 416              $url .= $parts['user'];
 417              if ( isset( $parts['pass'] ) )
 418                  $url .= ':' . $parts['pass'];
 419              $url .= '@';
 420          }
 421          if ( preg_match( '!^[\da-f]*:[\da-f.:]+$!ui', $parts['host'] ) )
 422              $url .= '[' . $parts['host'] . ']';    // IPv6

 423          else
 424              $url .= $parts['host'];            // IPv4 or name

 425          if ( isset( $parts['port'] ) )
 426              $url .= ':' . $parts['port'];
 427          if ( !empty( $parts['path'] ) && $parts['path'][0] != '/' )
 428              $url .= '/';
 429      }
 430      if ( !empty( $parts['path'] ) )
 431          $url .= $parts['path'];
 432      if ( isset( $parts['query'] ) )
 433          $url .= '?' . $parts['query'];
 434      if ( isset( $parts['fragment'] ) )
 435          $url .= '#' . $parts['fragment'];
 436      return $url;
 437  }
 438  
 439  ?>

title

Description

title

Description

title

Description

title

title

Body