[ Index ]

PHP Cross Reference of Web Application Component Toolkit

title

Body

[close]

/framework/template/compiler/ -> expressionlexer.inc.php (source)

   1  <?php
   2  //--------------------------------------------------------------------------------
   3  // Copyright 2003 Procata, Inc.
   4  // Released under the LGPL license (http://www.gnu.org/copyleft/lesser.html)
   5  //--------------------------------------------------------------------------------
   6  /**
   7  * Author Markus Baker: http://www.lastcraft.com
   8  * Version adapted from Simple Test: http://sourceforge.net/projects/simpletest/
   9  * @author Marcus Baker
  10  * @package WACT_TEMPLATE
  11  * @version $Id: expressionlexer.inc.php,v 1.2 2004/07/03 22:01:38 harryf Exp $
  12  */
  13  /**#@+
  14   * lexer mode constant
  15   */
  16  define("EXPRESSION_LEXER_ENTER", 1);
  17  define("EXPRESSION_LEXER_MATCHED", 2);
  18  define("EXPRESSION_LEXER_UNMATCHED", 3);
  19  define("EXPRESSION_LEXER_EXIT", 4);
  20  define("EXPRESSION_LEXER_SPECIAL", 5);
  21  /**#@-*/
  22  
  23  /**
  24   *    Compounded regular expression. Any of
  25   *    the contained patterns could match and
  26   *    when one does it's label is returned.
  27   *    @package WACT_TEMPLATE
  28   */
  29  class ExpressionLexerParallelRegex {
  30      var $_patterns;
  31      var $_labels;
  32      var $_regex;
  33      var $_case;
  34      
  35      /**
  36       *    Constructor. Starts with no patterns.
  37       *    @param boolean $case    True for case sensitive, false
  38       *                            for insensitive.
  39       *    @access public
  40       */
  41  	function ExpressionLexerParallelRegex($case) {
  42          $this->_case = $case;
  43          $this->_patterns = array();
  44          $this->_labels = array();
  45          $this->_regex = null;
  46      }
  47      
  48      /**
  49       *    Adds a pattern with an optional label.
  50       *    @param string $pattern      Perl style regex, but ( and )
  51       *                                lose the usual meaning.
  52       *    @param string $label        Label of regex to be returned
  53       *                                on a match.
  54       *    @access public
  55       */
  56  	function addPattern($pattern, $label = true) {
  57          $count = count($this->_patterns);
  58          $this->_patterns[$count] = $pattern;
  59          $this->_labels[$count] = $label;
  60          $this->_regex = null;
  61      }
  62      
  63      /**
  64       *    Attempts to match all patterns at once against
  65       *    a string.
  66       *    @param string $subject      String to match against.
  67       *    @param string $match        First matched portion of
  68       *                                subject.
  69       *    @return boolean             True on success.
  70       *    @access public
  71       */
  72  	function match($subject, &$match) {
  73          if (count($this->_patterns) == 0) {
  74              return false;
  75          }
  76          if (! preg_match($this->_getCompoundedRegex(), $subject, $matches)) {
  77              $match = "";
  78              return false;
  79          }
  80          $match = $matches[0];
  81          for ($i = 1; $i < count($matches); $i++) {
  82              if ($matches[$i]) {
  83                  return $this->_labels[$i - 1];
  84              }
  85          }
  86          return true;
  87      }
  88      
  89      /**
  90       *    Compounds the patterns into a single
  91       *    regular expression separated with the
  92       *    "or" operator. Caches the regex.
  93       *    Will automatically escape (, ) and / tokens.
  94       *    @param array $patterns    List of patterns in order.
  95       *    @access private
  96       */
  97  	function _getCompoundedRegex() {
  98          if ($this->_regex == null) {
  99              for ($i = 0; $i < count($this->_patterns); $i++) {
 100                  $this->_patterns[$i] = '(' . str_replace(
 101                          array('/', '(', ')'),
 102                          array('\/', '\(', '\)'),
 103                          $this->_patterns[$i]) . ')';
 104              }
 105              $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags();
 106          }
 107          return $this->_regex;
 108      }
 109      
 110      /**
 111       *    Accessor for perl regex mode flags to use.
 112       *    @return string       Perl regex flags.
 113       *    @access private
 114       */
 115  	function _getPerlMatchingFlags() {
 116          return ($this->_case ? "msS" : "msSi");
 117      }
 118  }
 119  
 120  /**
 121   *    States for a stack machine.
 122   *    @package WACT_TEMPLATE
 123    */
 124  class ExpressionLexerStateStack {
 125      var $_stack;
 126      
 127      /**
 128       *    Constructor. Starts in named state.
 129       *    @param string $start        Starting state name.
 130       *    @access public
 131       */
 132  	function ExpressionLexerStateStack($start) {
 133          $this->_stack = array($start);
 134      }
 135      
 136      /**
 137       *    Accessor for current state.
 138       *    @return string       State.
 139       *    @access public
 140       */
 141  	function getCurrent() {
 142          return $this->_stack[count($this->_stack) - 1];
 143      }
 144      
 145      /**
 146       *    Adds a state to the stack and sets it
 147       *    to be the current state.
 148       *    @param string $state        New state.
 149       *    @access public
 150       */
 151  	function enter($state) {
 152          array_push($this->_stack, $state);
 153      }
 154      
 155      /**
 156       *    Leaves the current state and reverts
 157       *    to the previous one.
 158       *    @return boolean    False if we drop off
 159       *                       the bottom of the list.
 160       *    @access public
 161       */
 162  	function leave() {
 163          if (count($this->_stack) == 1) {
 164              return false;
 165          }
 166          array_pop($this->_stack);
 167          return true;
 168      }
 169  }
 170  
 171  /**
 172   *    Accepts text and breaks it into tokens.
 173   *    Some optimisation to make the sure the
 174   *    content is only scanned by the PHP regex
 175   *    parser once. Lexer modes must not start
 176   *    with leading underscores.
 177   *    @package WACT_TEMPLATE
 178   */
 179  class ExpressionLexer {
 180      var $_regexes;
 181      var $_parser;
 182      var $_mode;
 183      var $_mode_handlers;
 184      var $_case;
 185      
 186      /**
 187       *    Sets up the lexer in case insensitive matching
 188       *    by default.
 189       *    @param ExpressionParser $parser  Handling strategy by
 190       *                                    reference.
 191       *    @param string $start            Starting handler.
 192       *    @param boolean $case            True for case sensitive.
 193       *    @access public
 194       */
 195  	function ExpressionLexer(&$parser, $start = "accept", $case = false) {
 196          $this->_case = $case;
 197          $this->_regexes = array();
 198          $this->_parser = &$parser;
 199          $this->_mode = &new ExpressionLexerStateStack($start);
 200          $this->_mode_handlers = array();
 201      }
 202      
 203      /**
 204       *    Adds a token search pattern for a particular
 205       *    parsing mode. The pattern does not change the
 206       *    current mode.
 207       *    @param string $pattern      Perl style regex, but ( and )
 208       *                                lose the usual meaning.
 209       *    @param string $mode         Should only apply this
 210       *                                pattern when dealing with
 211       *                                this type of input.
 212       *    @access public
 213       */
 214  	function addPattern($pattern, $mode = "accept") {
 215          if (! isset($this->_regexes[$mode])) {
 216              $this->_regexes[$mode] = new ExpressionLexerParallelRegex($this->_case);
 217          }
 218          $this->_regexes[$mode]->addPattern($pattern);
 219      }
 220      
 221      /**
 222       *    Adds a pattern that will enter a new parsing
 223       *    mode. Useful for entering parenthesis, strings,
 224       *    tags, etc.
 225       *    @param string $pattern      Perl style regex, but ( and )
 226       *                                lose the usual meaning.
 227       *    @param string $mode         Should only apply this
 228       *                                pattern when dealing with
 229       *                                this type of input.
 230       *    @param string $new_mode     Change parsing to this new
 231       *                                nested mode.
 232       *    @access public
 233       */
 234  	function addEntryPattern($pattern, $mode, $new_mode) {
 235          if (! isset($this->_regexes[$mode])) {
 236              $this->_regexes[$mode] = new ExpressionLexerParallelRegex($this->_case);
 237          }
 238          $this->_regexes[$mode]->addPattern($pattern, $new_mode);
 239      }
 240      
 241      /**
 242       *    Adds a pattern that will exit the current mode
 243       *    and re-enter the previous one.
 244       *    @param string $pattern      Perl style regex, but ( and )
 245       *                                lose the usual meaning.
 246       *    @param string $mode         Mode to leave.
 247       *    @access public
 248       */
 249  	function addExitPattern($pattern, $mode) {
 250          if (! isset($this->_regexes[$mode])) {
 251              $this->_regexes[$mode] = new ExpressionLexerParallelRegex($this->_case);
 252          }
 253          $this->_regexes[$mode]->addPattern($pattern, "__exit");
 254      }
 255      
 256      /**
 257       *    Adds a pattern that has a special mode. Acts as an entry
 258       *    and exit pattern in one go, effectively calling a special
 259       *    parser handler for this token only.
 260       *    @param string $pattern      Perl style regex, but ( and )
 261       *                                lose the usual meaning.
 262       *    @param string $mode         Should only apply this
 263       *                                pattern when dealing with
 264       *                                this type of input.
 265       *    @param string $special      Use this mode for this one token.
 266       *    @access public
 267       */
 268  	function addSpecialPattern($pattern, $mode, $special) {
 269          if (! isset($this->_regexes[$mode])) {
 270              $this->_regexes[$mode] = new ExpressionLexerParallelRegex($this->_case);
 271          }
 272          $this->_regexes[$mode]->addPattern($pattern, "_$special");
 273      }
 274      
 275      /**
 276       *    Adds a mapping from a mode to another handler.
 277       *    @param string $mode        Mode to be remapped.
 278       *    @param string $handler     New target handler.
 279       *    @access public
 280       */
 281  	function mapHandler($mode, $handler) {
 282          $this->_mode_handlers[$mode] = $handler;
 283      }
 284      
 285      /**
 286       *    Splits the page text into tokens. Will fail
 287       *    if the handlers report an error or if no
 288       *    content is consumed. If successful then each
 289       *    unparsed and parsed token invokes a call to the
 290       *    held listener.
 291       *    @param string $raw        Raw HTML text.
 292       *    @return boolean           True on success, else false.
 293       *    @access public
 294       */
 295  	function parse($raw) {
 296          if (! isset($this->_parser)) {
 297              return false;
 298          }
 299          $length = strlen($raw);
 300          while (is_array($parsed = $this->_reduce($raw))) {
 301              list($unmatched, $matched, $mode) = $parsed;
 302              if (! $this->_dispatchTokens($unmatched, $matched, $mode)) {
 303                  return false;
 304              }
 305              if (strlen($raw) == $length) {
 306                  return false;
 307              }
 308              $length = strlen($raw);
 309          }
 310          if (!$parsed) {
 311              return false;
 312          }
 313          return $this->_invokeParser($raw, EXPRESSION_LEXER_UNMATCHED);
 314      }
 315      
 316      /**
 317       *    Sends the matched token and any leading unmatched
 318       *    text to the parser changing the lexer to a new
 319       *    mode if one is listed.
 320       *    @param string $unmatched    Unmatched leading portion.
 321       *    @param string $matched      Actual token match.
 322       *    @param string $mode         Mode after match. A boolean
 323       *                                false mode causes no change.
 324       *    @return boolean             False if there was any error
 325       *                                from the parser.
 326       *    @access private
 327       */
 328  	function _dispatchTokens($unmatched, $matched, $mode = false) {
 329          if (! $this->_invokeParser($unmatched, EXPRESSION_LEXER_UNMATCHED)) {
 330              return false;
 331          }
 332          if ($this->_isModeEnd($mode)) {
 333              if (! $this->_invokeParser($matched, EXPRESSION_LEXER_EXIT)) {
 334                  return false;
 335              }
 336              return $this->_mode->leave();
 337          }
 338          if ($this->_isSpecialMode($mode)) {
 339              $this->_mode->enter($this->_decodeSpecial($mode));
 340              if (! $this->_invokeParser($matched, EXPRESSION_LEXER_SPECIAL)) {
 341                  return false;
 342              }
 343              return $this->_mode->leave();
 344          }
 345          if (is_string($mode)) {
 346              $this->_mode->enter($mode);
 347              return $this->_invokeParser($matched, EXPRESSION_LEXER_ENTER);
 348          }
 349          return $this->_invokeParser($matched, EXPRESSION_LEXER_MATCHED);
 350      }
 351      
 352      /**
 353       *    Tests to see if the new mode is actually to leave
 354       *    the current mode and pop an item from the matching
 355       *    mode stack.
 356       *    @param string $mode    Mode to test.
 357       *    @return boolean        True if this is the exit mode.
 358       *    @access private
 359       */
 360  	function _isModeEnd($mode) {
 361          return ($mode === "__exit");
 362      }
 363      
 364      /**
 365       *    Test to see if the mode is one where this mode
 366       *    is entered for this token only and automatically
 367       *    leaves immediately afterwoods.
 368       *    @param string $mode    Mode to test.
 369       *    @return boolean        True if this is the exit mode.
 370       *    @access private
 371       */
 372  	function _isSpecialMode($mode) {
 373          return (strncmp($mode, "_", 1) == 0);
 374      }
 375      
 376      /**
 377       *    Strips the magic underscore marking single token
 378       *    modes.
 379       *    @param string $mode    Mode to decode.
 380       *    @return string         Underlying mode name.
 381       *    @access private
 382       */
 383  	function _decodeSpecial($mode) {
 384          return substr($mode, 1);
 385      }
 386      
 387      /**
 388       *    Calls the parser method named after the current
 389       *    mode. Empty content will be ignored. The lexer
 390       *    has a parser handler for each mode in the lexer.
 391       *    @param string $content        Text parsed.
 392       *    @param boolean $is_match      Token is recognised rather
 393       *                                  than unparsed data.
 394       *    @access private
 395       */
 396  	function _invokeParser($content, $is_match) {
 397          if (($content === "") || ($content === false)) {
 398              return true;
 399          }
 400          $handler = $this->_mode->getCurrent();
 401          if (isset($this->_mode_handlers[$handler])) {
 402              $handler = $this->_mode_handlers[$handler];
 403          }
 404          return $this->_parser->$handler($content, $is_match);
 405      }
 406      
 407      /**
 408       *    Tries to match a chunk of text and if successful
 409       *    removes the recognised chunk and any leading
 410       *    unparsed data. Empty strings will not be matched.
 411       *    @param string $raw         The subject to parse. This is the
 412       *                               content that will be eaten.
 413       *    @return array              Three item list of unparsed
 414       *                               content followed by the
 415       *                               recognised token and finally the
 416       *                               action the parser is to take.
 417       *                               True if no match, false if there
 418       *                               is a parsing error.
 419       *    @access private
 420       */
 421  	function _reduce(&$raw) {
 422          if (! isset($this->_regexes[$this->_mode->getCurrent()])) {
 423              return false;
 424          }
 425          if ($raw === "") {
 426              return true;
 427          }
 428          if ($action = $this->_regexes[$this->_mode->getCurrent()]->match($raw, $match)) {
 429              $unparsed_character_count = strpos($raw, $match);
 430              $unparsed = substr($raw, 0, $unparsed_character_count);
 431              $raw = substr($raw, $unparsed_character_count + strlen($match));
 432              return array($unparsed, $match, $action);
 433          }
 434          return true;
 435      }
 436  }
 437  ?>


Generated: Sun Nov 28 19:36:09 2004 Cross-referenced by PHPXref 0.5