| [ Index ] |
PHP Cross Reference of Web Application Component Toolkit |
[Summary view] [Print] [Text view]
1 <?php 2 //-------------------------------------------------------------------------------- 3 // Copyright 2003 Procata, Inc. 4 // Released under the LGPL license (http://www.gnu.org/copyleft/lesser.html) 5 //-------------------------------------------------------------------------------- 6 /** 7 * Author Markus Baker: http://www.lastcraft.com 8 * Version adapted from Simple Test: http://sourceforge.net/projects/simpletest/ 9 * @author Marcus Baker 10 * @package WACT_TEMPLATE 11 * @version $Id: expressionlexer.inc.php,v 1.2 2004/07/03 22:01:38 harryf Exp $ 12 */ 13 /**#@+ 14 * lexer mode constant 15 */ 16 define("EXPRESSION_LEXER_ENTER", 1); 17 define("EXPRESSION_LEXER_MATCHED", 2); 18 define("EXPRESSION_LEXER_UNMATCHED", 3); 19 define("EXPRESSION_LEXER_EXIT", 4); 20 define("EXPRESSION_LEXER_SPECIAL", 5); 21 /**#@-*/ 22 23 /** 24 * Compounded regular expression. Any of 25 * the contained patterns could match and 26 * when one does it's label is returned. 27 * @package WACT_TEMPLATE 28 */ 29 class ExpressionLexerParallelRegex { 30 var $_patterns; 31 var $_labels; 32 var $_regex; 33 var $_case; 34 35 /** 36 * Constructor. Starts with no patterns. 37 * @param boolean $case True for case sensitive, false 38 * for insensitive. 39 * @access public 40 */ 41 function ExpressionLexerParallelRegex($case) { 42 $this->_case = $case; 43 $this->_patterns = array(); 44 $this->_labels = array(); 45 $this->_regex = null; 46 } 47 48 /** 49 * Adds a pattern with an optional label. 50 * @param string $pattern Perl style regex, but ( and ) 51 * lose the usual meaning. 52 * @param string $label Label of regex to be returned 53 * on a match. 54 * @access public 55 */ 56 function addPattern($pattern, $label = true) { 57 $count = count($this->_patterns); 58 $this->_patterns[$count] = $pattern; 59 $this->_labels[$count] = $label; 60 $this->_regex = null; 61 } 62 63 /** 64 * Attempts to match all patterns at once against 65 * a string. 66 * @param string $subject String to match against. 67 * @param string $match First matched portion of 68 * subject. 69 * @return boolean True on success. 70 * @access public 71 */ 72 function match($subject, &$match) { 73 if (count($this->_patterns) == 0) { 74 return false; 75 } 76 if (! preg_match($this->_getCompoundedRegex(), $subject, $matches)) { 77 $match = ""; 78 return false; 79 } 80 $match = $matches[0]; 81 for ($i = 1; $i < count($matches); $i++) { 82 if ($matches[$i]) { 83 return $this->_labels[$i - 1]; 84 } 85 } 86 return true; 87 } 88 89 /** 90 * Compounds the patterns into a single 91 * regular expression separated with the 92 * "or" operator. Caches the regex. 93 * Will automatically escape (, ) and / tokens. 94 * @param array $patterns List of patterns in order. 95 * @access private 96 */ 97 function _getCompoundedRegex() { 98 if ($this->_regex == null) { 99 for ($i = 0; $i < count($this->_patterns); $i++) { 100 $this->_patterns[$i] = '(' . str_replace( 101 array('/', '(', ')'), 102 array('\/', '\(', '\)'), 103 $this->_patterns[$i]) . ')'; 104 } 105 $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags(); 106 } 107 return $this->_regex; 108 } 109 110 /** 111 * Accessor for perl regex mode flags to use. 112 * @return string Perl regex flags. 113 * @access private 114 */ 115 function _getPerlMatchingFlags() { 116 return ($this->_case ? "msS" : "msSi"); 117 } 118 } 119 120 /** 121 * States for a stack machine. 122 * @package WACT_TEMPLATE 123 */ 124 class ExpressionLexerStateStack { 125 var $_stack; 126 127 /** 128 * Constructor. Starts in named state. 129 * @param string $start Starting state name. 130 * @access public 131 */ 132 function ExpressionLexerStateStack($start) { 133 $this->_stack = array($start); 134 } 135 136 /** 137 * Accessor for current state. 138 * @return string State. 139 * @access public 140 */ 141 function getCurrent() { 142 return $this->_stack[count($this->_stack) - 1]; 143 } 144 145 /** 146 * Adds a state to the stack and sets it 147 * to be the current state. 148 * @param string $state New state. 149 * @access public 150 */ 151 function enter($state) { 152 array_push($this->_stack, $state); 153 } 154 155 /** 156 * Leaves the current state and reverts 157 * to the previous one. 158 * @return boolean False if we drop off 159 * the bottom of the list. 160 * @access public 161 */ 162 function leave() { 163 if (count($this->_stack) == 1) { 164 return false; 165 } 166 array_pop($this->_stack); 167 return true; 168 } 169 } 170 171 /** 172 * Accepts text and breaks it into tokens. 173 * Some optimisation to make the sure the 174 * content is only scanned by the PHP regex 175 * parser once. Lexer modes must not start 176 * with leading underscores. 177 * @package WACT_TEMPLATE 178 */ 179 class ExpressionLexer { 180 var $_regexes; 181 var $_parser; 182 var $_mode; 183 var $_mode_handlers; 184 var $_case; 185 186 /** 187 * Sets up the lexer in case insensitive matching 188 * by default. 189 * @param ExpressionParser $parser Handling strategy by 190 * reference. 191 * @param string $start Starting handler. 192 * @param boolean $case True for case sensitive. 193 * @access public 194 */ 195 function ExpressionLexer(&$parser, $start = "accept", $case = false) { 196 $this->_case = $case; 197 $this->_regexes = array(); 198 $this->_parser = &$parser; 199 $this->_mode = &new ExpressionLexerStateStack($start); 200 $this->_mode_handlers = array(); 201 } 202 203 /** 204 * Adds a token search pattern for a particular 205 * parsing mode. The pattern does not change the 206 * current mode. 207 * @param string $pattern Perl style regex, but ( and ) 208 * lose the usual meaning. 209 * @param string $mode Should only apply this 210 * pattern when dealing with 211 * this type of input. 212 * @access public 213 */ 214 function addPattern($pattern, $mode = "accept") { 215 if (! isset($this->_regexes[$mode])) { 216 $this->_regexes[$mode] = new ExpressionLexerParallelRegex($this->_case); 217 } 218 $this->_regexes[$mode]->addPattern($pattern); 219 } 220 221 /** 222 * Adds a pattern that will enter a new parsing 223 * mode. Useful for entering parenthesis, strings, 224 * tags, etc. 225 * @param string $pattern Perl style regex, but ( and ) 226 * lose the usual meaning. 227 * @param string $mode Should only apply this 228 * pattern when dealing with 229 * this type of input. 230 * @param string $new_mode Change parsing to this new 231 * nested mode. 232 * @access public 233 */ 234 function addEntryPattern($pattern, $mode, $new_mode) { 235 if (! isset($this->_regexes[$mode])) { 236 $this->_regexes[$mode] = new ExpressionLexerParallelRegex($this->_case); 237 } 238 $this->_regexes[$mode]->addPattern($pattern, $new_mode); 239 } 240 241 /** 242 * Adds a pattern that will exit the current mode 243 * and re-enter the previous one. 244 * @param string $pattern Perl style regex, but ( and ) 245 * lose the usual meaning. 246 * @param string $mode Mode to leave. 247 * @access public 248 */ 249 function addExitPattern($pattern, $mode) { 250 if (! isset($this->_regexes[$mode])) { 251 $this->_regexes[$mode] = new ExpressionLexerParallelRegex($this->_case); 252 } 253 $this->_regexes[$mode]->addPattern($pattern, "__exit"); 254 } 255 256 /** 257 * Adds a pattern that has a special mode. Acts as an entry 258 * and exit pattern in one go, effectively calling a special 259 * parser handler for this token only. 260 * @param string $pattern Perl style regex, but ( and ) 261 * lose the usual meaning. 262 * @param string $mode Should only apply this 263 * pattern when dealing with 264 * this type of input. 265 * @param string $special Use this mode for this one token. 266 * @access public 267 */ 268 function addSpecialPattern($pattern, $mode, $special) { 269 if (! isset($this->_regexes[$mode])) { 270 $this->_regexes[$mode] = new ExpressionLexerParallelRegex($this->_case); 271 } 272 $this->_regexes[$mode]->addPattern($pattern, "_$special"); 273 } 274 275 /** 276 * Adds a mapping from a mode to another handler. 277 * @param string $mode Mode to be remapped. 278 * @param string $handler New target handler. 279 * @access public 280 */ 281 function mapHandler($mode, $handler) { 282 $this->_mode_handlers[$mode] = $handler; 283 } 284 285 /** 286 * Splits the page text into tokens. Will fail 287 * if the handlers report an error or if no 288 * content is consumed. If successful then each 289 * unparsed and parsed token invokes a call to the 290 * held listener. 291 * @param string $raw Raw HTML text. 292 * @return boolean True on success, else false. 293 * @access public 294 */ 295 function parse($raw) { 296 if (! isset($this->_parser)) { 297 return false; 298 } 299 $length = strlen($raw); 300 while (is_array($parsed = $this->_reduce($raw))) { 301 list($unmatched, $matched, $mode) = $parsed; 302 if (! $this->_dispatchTokens($unmatched, $matched, $mode)) { 303 return false; 304 } 305 if (strlen($raw) == $length) { 306 return false; 307 } 308 $length = strlen($raw); 309 } 310 if (!$parsed) { 311 return false; 312 } 313 return $this->_invokeParser($raw, EXPRESSION_LEXER_UNMATCHED); 314 } 315 316 /** 317 * Sends the matched token and any leading unmatched 318 * text to the parser changing the lexer to a new 319 * mode if one is listed. 320 * @param string $unmatched Unmatched leading portion. 321 * @param string $matched Actual token match. 322 * @param string $mode Mode after match. A boolean 323 * false mode causes no change. 324 * @return boolean False if there was any error 325 * from the parser. 326 * @access private 327 */ 328 function _dispatchTokens($unmatched, $matched, $mode = false) { 329 if (! $this->_invokeParser($unmatched, EXPRESSION_LEXER_UNMATCHED)) { 330 return false; 331 } 332 if ($this->_isModeEnd($mode)) { 333 if (! $this->_invokeParser($matched, EXPRESSION_LEXER_EXIT)) { 334 return false; 335 } 336 return $this->_mode->leave(); 337 } 338 if ($this->_isSpecialMode($mode)) { 339 $this->_mode->enter($this->_decodeSpecial($mode)); 340 if (! $this->_invokeParser($matched, EXPRESSION_LEXER_SPECIAL)) { 341 return false; 342 } 343 return $this->_mode->leave(); 344 } 345 if (is_string($mode)) { 346 $this->_mode->enter($mode); 347 return $this->_invokeParser($matched, EXPRESSION_LEXER_ENTER); 348 } 349 return $this->_invokeParser($matched, EXPRESSION_LEXER_MATCHED); 350 } 351 352 /** 353 * Tests to see if the new mode is actually to leave 354 * the current mode and pop an item from the matching 355 * mode stack. 356 * @param string $mode Mode to test. 357 * @return boolean True if this is the exit mode. 358 * @access private 359 */ 360 function _isModeEnd($mode) { 361 return ($mode === "__exit"); 362 } 363 364 /** 365 * Test to see if the mode is one where this mode 366 * is entered for this token only and automatically 367 * leaves immediately afterwoods. 368 * @param string $mode Mode to test. 369 * @return boolean True if this is the exit mode. 370 * @access private 371 */ 372 function _isSpecialMode($mode) { 373 return (strncmp($mode, "_", 1) == 0); 374 } 375 376 /** 377 * Strips the magic underscore marking single token 378 * modes. 379 * @param string $mode Mode to decode. 380 * @return string Underlying mode name. 381 * @access private 382 */ 383 function _decodeSpecial($mode) { 384 return substr($mode, 1); 385 } 386 387 /** 388 * Calls the parser method named after the current 389 * mode. Empty content will be ignored. The lexer 390 * has a parser handler for each mode in the lexer. 391 * @param string $content Text parsed. 392 * @param boolean $is_match Token is recognised rather 393 * than unparsed data. 394 * @access private 395 */ 396 function _invokeParser($content, $is_match) { 397 if (($content === "") || ($content === false)) { 398 return true; 399 } 400 $handler = $this->_mode->getCurrent(); 401 if (isset($this->_mode_handlers[$handler])) { 402 $handler = $this->_mode_handlers[$handler]; 403 } 404 return $this->_parser->$handler($content, $is_match); 405 } 406 407 /** 408 * Tries to match a chunk of text and if successful 409 * removes the recognised chunk and any leading 410 * unparsed data. Empty strings will not be matched. 411 * @param string $raw The subject to parse. This is the 412 * content that will be eaten. 413 * @return array Three item list of unparsed 414 * content followed by the 415 * recognised token and finally the 416 * action the parser is to take. 417 * True if no match, false if there 418 * is a parsing error. 419 * @access private 420 */ 421 function _reduce(&$raw) { 422 if (! isset($this->_regexes[$this->_mode->getCurrent()])) { 423 return false; 424 } 425 if ($raw === "") { 426 return true; 427 } 428 if ($action = $this->_regexes[$this->_mode->getCurrent()]->match($raw, $match)) { 429 $unparsed_character_count = strpos($raw, $match); 430 $unparsed = substr($raw, 0, $unparsed_character_count); 431 $raw = substr($raw, $unparsed_character_count + strlen($match)); 432 return array($unparsed, $match, $action); 433 } 434 return true; 435 } 436 } 437 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Sun Nov 28 19:36:09 2004 | Cross-referenced by PHPXref 0.5 |