2011-10-05 06:22:53 +02:00
< ? php
/*
* This file is part of Twig .
*
2018-05-10 12:24:53 +02:00
* ( c ) Fabien Potencier
* ( c ) Armin Ronacher
2011-10-05 06:22:53 +02:00
*
* For the full copyright and license information , please view the LICENSE
* file that was distributed with this source code .
*/
/**
* Lexes a template string .
*
2013-08-01 21:20:12 +02:00
* @ author Fabien Potencier < fabien @ symfony . com >
2011-10-05 06:22:53 +02:00
*/
class Twig_Lexer implements Twig_LexerInterface
{
protected $tokens ;
protected $code ;
protected $cursor ;
protected $lineno ;
protected $end ;
protected $state ;
2013-08-01 21:20:12 +02:00
protected $states ;
2011-10-05 06:22:53 +02:00
protected $brackets ;
protected $env ;
2018-05-10 12:24:53 +02:00
// to be renamed to $name in 2.0 (where it is private)
2011-10-05 06:22:53 +02:00
protected $filename ;
protected $options ;
2013-08-01 21:20:12 +02:00
protected $regexes ;
protected $position ;
protected $positions ;
protected $currentVarBlockLine ;
2018-05-10 12:24:53 +02:00
private $source ;
2013-08-01 21:20:12 +02:00
2018-05-10 12:24:53 +02:00
const STATE_DATA = 0 ;
const STATE_BLOCK = 1 ;
const STATE_VAR = 2 ;
const STATE_STRING = 3 ;
const STATE_INTERPOLATION = 4 ;
const REGEX_NAME = '/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/A' ;
const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?/A' ;
const REGEX_STRING = '/"([^#"\\\\]*(?:\\\\.[^#"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As' ;
2013-08-01 21:20:12 +02:00
const REGEX_DQ_STRING_DELIM = '/"/A' ;
2018-05-10 12:24:53 +02:00
const REGEX_DQ_STRING_PART = '/[^#"\\\\]*(?:(?:\\\\.|#(?!\{))[^#"\\\\]*)*/As' ;
const PUNCTUATION = '()[]{}?:.,|' ;
2011-10-05 06:22:53 +02:00
public function __construct ( Twig_Environment $env , array $options = array ())
{
$this -> env = $env ;
$this -> options = array_merge ( array (
2018-05-10 12:24:53 +02:00
'tag_comment' => array ( '{#' , '#}' ),
'tag_block' => array ( '{%' , '%}' ),
'tag_variable' => array ( '{{' , '}}' ),
2011-10-05 06:22:53 +02:00
'whitespace_trim' => '-' ,
2018-05-10 12:24:53 +02:00
'interpolation' => array ( '#{' , '}' ),
2011-10-05 06:22:53 +02:00
), $options );
2013-08-01 21:20:12 +02:00
$this -> regexes = array (
2018-05-10 12:24:53 +02:00
'lex_var' => '/\s*' . preg_quote ( $this -> options [ 'whitespace_trim' ] . $this -> options [ 'tag_variable' ][ 1 ], '/' ) . '\s*|\s*' . preg_quote ( $this -> options [ 'tag_variable' ][ 1 ], '/' ) . '/A' ,
'lex_block' => '/\s*(?:' . preg_quote ( $this -> options [ 'whitespace_trim' ] . $this -> options [ 'tag_block' ][ 1 ], '/' ) . '\s*|\s*' . preg_quote ( $this -> options [ 'tag_block' ][ 1 ], '/' ) . ')\n?/A' ,
'lex_raw_data' => '/(' . preg_quote ( $this -> options [ 'tag_block' ][ 0 ] . $this -> options [ 'whitespace_trim' ], '/' ) . '|' . preg_quote ( $this -> options [ 'tag_block' ][ 0 ], '/' ) . ')\s*(?:end%s)\s*(?:' . preg_quote ( $this -> options [ 'whitespace_trim' ] . $this -> options [ 'tag_block' ][ 1 ], '/' ) . '\s*|\s*' . preg_quote ( $this -> options [ 'tag_block' ][ 1 ], '/' ) . ')/s' ,
'operator' => $this -> getOperatorRegex (),
'lex_comment' => '/(?:' . preg_quote ( $this -> options [ 'whitespace_trim' ], '/' ) . preg_quote ( $this -> options [ 'tag_comment' ][ 1 ], '/' ) . '\s*|' . preg_quote ( $this -> options [ 'tag_comment' ][ 1 ], '/' ) . ')\n?/s' ,
'lex_block_raw' => '/\s*(raw|verbatim)\s*(?:' . preg_quote ( $this -> options [ 'whitespace_trim' ] . $this -> options [ 'tag_block' ][ 1 ], '/' ) . '\s*|\s*' . preg_quote ( $this -> options [ 'tag_block' ][ 1 ], '/' ) . ')/As' ,
'lex_block_line' => '/\s*line\s+(\d+)\s*' . preg_quote ( $this -> options [ 'tag_block' ][ 1 ], '/' ) . '/As' ,
'lex_tokens_start' => '/(' . preg_quote ( $this -> options [ 'tag_variable' ][ 0 ], '/' ) . '|' . preg_quote ( $this -> options [ 'tag_block' ][ 0 ], '/' ) . '|' . preg_quote ( $this -> options [ 'tag_comment' ][ 0 ], '/' ) . ')(' . preg_quote ( $this -> options [ 'whitespace_trim' ], '/' ) . ')?/s' ,
2013-08-01 21:20:12 +02:00
'interpolation_start' => '/' . preg_quote ( $this -> options [ 'interpolation' ][ 0 ], '/' ) . '\s*/A' ,
2018-05-10 12:24:53 +02:00
'interpolation_end' => '/\s*' . preg_quote ( $this -> options [ 'interpolation' ][ 1 ], '/' ) . '/A' ,
2013-08-01 21:20:12 +02:00
);
2011-10-05 06:22:53 +02:00
}
2018-05-10 12:24:53 +02:00
public function tokenize ( $code , $name = null )
2011-10-05 06:22:53 +02:00
{
2018-05-10 12:24:53 +02:00
if ( ! $code instanceof Twig_Source ) {
@ trigger_error ( sprintf ( 'Passing a string as the $code argument of %s() is deprecated since version 1.27 and will be removed in 2.0. Pass a Twig_Source instance instead.' , __METHOD__ ), E_USER_DEPRECATED );
$this -> source = new Twig_Source ( $code , $name );
} else {
$this -> source = $code ;
}
if ((( int ) ini_get ( 'mbstring.func_overload' )) & 2 ) {
@ trigger_error ( 'Support for having "mbstring.func_overload" different from 0 is deprecated version 1.29 and will be removed in 2.0.' , E_USER_DEPRECATED );
}
2011-10-05 06:22:53 +02:00
if ( function_exists ( 'mb_internal_encoding' ) && (( int ) ini_get ( 'mbstring.func_overload' )) & 2 ) {
$mbEncoding = mb_internal_encoding ();
mb_internal_encoding ( 'ASCII' );
2018-05-10 12:24:53 +02:00
} else {
$mbEncoding = null ;
2011-10-05 06:22:53 +02:00
}
2018-05-10 12:24:53 +02:00
$this -> code = str_replace ( array ( " \r \n " , " \r " ), " \n " , $this -> source -> getCode ());
$this -> filename = $this -> source -> getName ();
2011-10-05 06:22:53 +02:00
$this -> cursor = 0 ;
$this -> lineno = 1 ;
$this -> end = strlen ( $this -> code );
$this -> tokens = array ();
$this -> state = self :: STATE_DATA ;
2013-08-01 21:20:12 +02:00
$this -> states = array ();
2011-10-05 06:22:53 +02:00
$this -> brackets = array ();
2013-08-01 21:20:12 +02:00
$this -> position = - 1 ;
// find all token starts in one go
preg_match_all ( $this -> regexes [ 'lex_tokens_start' ], $this -> code , $matches , PREG_OFFSET_CAPTURE );
$this -> positions = $matches ;
2011-10-05 06:22:53 +02:00
while ( $this -> cursor < $this -> end ) {
// dispatch to the lexing functions depending
// on the current state
switch ( $this -> state ) {
case self :: STATE_DATA :
$this -> lexData ();
break ;
case self :: STATE_BLOCK :
$this -> lexBlock ();
break ;
case self :: STATE_VAR :
$this -> lexVar ();
break ;
2013-08-01 21:20:12 +02:00
case self :: STATE_STRING :
$this -> lexString ();
break ;
case self :: STATE_INTERPOLATION :
$this -> lexInterpolation ();
break ;
2011-10-05 06:22:53 +02:00
}
}
$this -> pushToken ( Twig_Token :: EOF_TYPE );
if ( ! empty ( $this -> brackets )) {
list ( $expect , $lineno ) = array_pop ( $this -> brackets );
2018-05-10 12:24:53 +02:00
throw new Twig_Error_Syntax ( sprintf ( 'Unclosed "%s".' , $expect ), $lineno , $this -> source );
2011-10-05 06:22:53 +02:00
}
2018-05-10 12:24:53 +02:00
if ( $mbEncoding ) {
2011-10-05 06:22:53 +02:00
mb_internal_encoding ( $mbEncoding );
}
2018-05-10 12:24:53 +02:00
return new Twig_TokenStream ( $this -> tokens , $this -> source );
2011-10-05 06:22:53 +02:00
}
protected function lexData ()
{
// if no matches are left we return the rest of the template as simple text token
2013-08-01 21:20:12 +02:00
if ( $this -> position == count ( $this -> positions [ 0 ]) - 1 ) {
2011-10-05 06:22:53 +02:00
$this -> pushToken ( Twig_Token :: TEXT_TYPE , substr ( $this -> code , $this -> cursor ));
$this -> cursor = $this -> end ;
2013-08-01 21:20:12 +02:00
2011-10-05 06:22:53 +02:00
return ;
}
2013-08-01 21:20:12 +02:00
// Find the first token after the current cursor
$position = $this -> positions [ 0 ][ ++ $this -> position ];
while ( $position [ 1 ] < $this -> cursor ) {
if ( $this -> position == count ( $this -> positions [ 0 ]) - 1 ) {
return ;
}
$position = $this -> positions [ 0 ][ ++ $this -> position ];
}
2011-10-05 06:22:53 +02:00
// push the template text first
2013-08-01 21:20:12 +02:00
$text = $textContent = substr ( $this -> code , $this -> cursor , $position [ 1 ] - $this -> cursor );
if ( isset ( $this -> positions [ 2 ][ $this -> position ][ 0 ])) {
2011-10-05 06:22:53 +02:00
$text = rtrim ( $text );
}
$this -> pushToken ( Twig_Token :: TEXT_TYPE , $text );
2013-08-01 21:20:12 +02:00
$this -> moveCursor ( $textContent . $position [ 0 ]);
2011-10-05 06:22:53 +02:00
2013-08-01 21:20:12 +02:00
switch ( $this -> positions [ 1 ][ $this -> position ][ 0 ]) {
2011-10-05 06:22:53 +02:00
case $this -> options [ 'tag_comment' ][ 0 ] :
$this -> lexComment ();
break ;
case $this -> options [ 'tag_block' ][ 0 ] :
// raw data?
2013-08-01 21:20:12 +02:00
if ( preg_match ( $this -> regexes [ 'lex_block_raw' ], $this -> code , $match , null , $this -> cursor )) {
2011-10-05 06:22:53 +02:00
$this -> moveCursor ( $match [ 0 ]);
2013-08-01 21:20:12 +02:00
$this -> lexRawData ( $match [ 1 ]);
2011-10-05 06:22:53 +02:00
// {% line \d+ %}
2013-08-01 21:20:12 +02:00
} elseif ( preg_match ( $this -> regexes [ 'lex_block_line' ], $this -> code , $match , null , $this -> cursor )) {
2011-10-05 06:22:53 +02:00
$this -> moveCursor ( $match [ 0 ]);
$this -> lineno = ( int ) $match [ 1 ];
} else {
$this -> pushToken ( Twig_Token :: BLOCK_START_TYPE );
2013-08-01 21:20:12 +02:00
$this -> pushState ( self :: STATE_BLOCK );
$this -> currentVarBlockLine = $this -> lineno ;
2011-10-05 06:22:53 +02:00
}
break ;
case $this -> options [ 'tag_variable' ][ 0 ] :
$this -> pushToken ( Twig_Token :: VAR_START_TYPE );
2013-08-01 21:20:12 +02:00
$this -> pushState ( self :: STATE_VAR );
$this -> currentVarBlockLine = $this -> lineno ;
2011-10-05 06:22:53 +02:00
break ;
}
}
protected function lexBlock ()
{
2013-08-01 21:20:12 +02:00
if ( empty ( $this -> brackets ) && preg_match ( $this -> regexes [ 'lex_block' ], $this -> code , $match , null , $this -> cursor )) {
2011-10-05 06:22:53 +02:00
$this -> pushToken ( Twig_Token :: BLOCK_END_TYPE );
$this -> moveCursor ( $match [ 0 ]);
2013-08-01 21:20:12 +02:00
$this -> popState ();
2011-10-05 06:22:53 +02:00
} else {
$this -> lexExpression ();
}
}
protected function lexVar ()
{
2013-08-01 21:20:12 +02:00
if ( empty ( $this -> brackets ) && preg_match ( $this -> regexes [ 'lex_var' ], $this -> code , $match , null , $this -> cursor )) {
2011-10-05 06:22:53 +02:00
$this -> pushToken ( Twig_Token :: VAR_END_TYPE );
$this -> moveCursor ( $match [ 0 ]);
2013-08-01 21:20:12 +02:00
$this -> popState ();
2011-10-05 06:22:53 +02:00
} else {
$this -> lexExpression ();
}
}
protected function lexExpression ()
{
// whitespace
if ( preg_match ( '/\s+/A' , $this -> code , $match , null , $this -> cursor )) {
$this -> moveCursor ( $match [ 0 ]);
if ( $this -> cursor >= $this -> end ) {
2018-05-10 12:24:53 +02:00
throw new Twig_Error_Syntax ( sprintf ( 'Unclosed "%s".' , self :: STATE_BLOCK === $this -> state ? 'block' : 'variable' ), $this -> currentVarBlockLine , $this -> source );
2011-10-05 06:22:53 +02:00
}
}
// operators
2013-08-01 21:20:12 +02:00
if ( preg_match ( $this -> regexes [ 'operator' ], $this -> code , $match , null , $this -> cursor )) {
2018-05-10 12:24:53 +02:00
$this -> pushToken ( Twig_Token :: OPERATOR_TYPE , preg_replace ( '/\s+/' , ' ' , $match [ 0 ]));
2011-10-05 06:22:53 +02:00
$this -> moveCursor ( $match [ 0 ]);
}
// names
elseif ( preg_match ( self :: REGEX_NAME , $this -> code , $match , null , $this -> cursor )) {
$this -> pushToken ( Twig_Token :: NAME_TYPE , $match [ 0 ]);
$this -> moveCursor ( $match [ 0 ]);
}
// numbers
elseif ( preg_match ( self :: REGEX_NUMBER , $this -> code , $match , null , $this -> cursor )) {
2013-08-01 21:20:12 +02:00
$number = ( float ) $match [ 0 ]; // floats
if ( ctype_digit ( $match [ 0 ]) && $number <= PHP_INT_MAX ) {
$number = ( int ) $match [ 0 ]; // integers lower than the maximum
}
$this -> pushToken ( Twig_Token :: NUMBER_TYPE , $number );
2011-10-05 06:22:53 +02:00
$this -> moveCursor ( $match [ 0 ]);
}
// punctuation
elseif ( false !== strpos ( self :: PUNCTUATION , $this -> code [ $this -> cursor ])) {
// opening bracket
if ( false !== strpos ( '([{' , $this -> code [ $this -> cursor ])) {
$this -> brackets [] = array ( $this -> code [ $this -> cursor ], $this -> lineno );
}
// closing bracket
elseif ( false !== strpos ( ')]}' , $this -> code [ $this -> cursor ])) {
if ( empty ( $this -> brackets )) {
2018-05-10 12:24:53 +02:00
throw new Twig_Error_Syntax ( sprintf ( 'Unexpected "%s".' , $this -> code [ $this -> cursor ]), $this -> lineno , $this -> source );
2011-10-05 06:22:53 +02:00
}
list ( $expect , $lineno ) = array_pop ( $this -> brackets );
if ( $this -> code [ $this -> cursor ] != strtr ( $expect , '([{' , ')]}' )) {
2018-05-10 12:24:53 +02:00
throw new Twig_Error_Syntax ( sprintf ( 'Unclosed "%s".' , $expect ), $lineno , $this -> source );
2011-10-05 06:22:53 +02:00
}
}
$this -> pushToken ( Twig_Token :: PUNCTUATION_TYPE , $this -> code [ $this -> cursor ]);
++ $this -> cursor ;
}
// strings
elseif ( preg_match ( self :: REGEX_STRING , $this -> code , $match , null , $this -> cursor )) {
$this -> pushToken ( Twig_Token :: STRING_TYPE , stripcslashes ( substr ( $match [ 0 ], 1 , - 1 )));
$this -> moveCursor ( $match [ 0 ]);
}
2013-08-01 21:20:12 +02:00
// opening double quoted string
elseif ( preg_match ( self :: REGEX_DQ_STRING_DELIM , $this -> code , $match , null , $this -> cursor )) {
$this -> brackets [] = array ( '"' , $this -> lineno );
$this -> pushState ( self :: STATE_STRING );
$this -> moveCursor ( $match [ 0 ]);
}
2011-10-05 06:22:53 +02:00
// unlexable
else {
2018-05-10 12:24:53 +02:00
throw new Twig_Error_Syntax ( sprintf ( 'Unexpected character "%s".' , $this -> code [ $this -> cursor ]), $this -> lineno , $this -> source );
2011-10-05 06:22:53 +02:00
}
}
2013-08-01 21:20:12 +02:00
protected function lexRawData ( $tag )
2011-10-05 06:22:53 +02:00
{
2018-05-10 12:24:53 +02:00
if ( 'raw' === $tag ) {
@ trigger_error ( sprintf ( 'Twig Tag "raw" is deprecated since version 1.21. Use "verbatim" instead in %s at line %d.' , $this -> filename , $this -> lineno ), E_USER_DEPRECATED );
}
2013-08-01 21:20:12 +02:00
if ( ! preg_match ( str_replace ( '%s' , $tag , $this -> regexes [ 'lex_raw_data' ]), $this -> code , $match , PREG_OFFSET_CAPTURE , $this -> cursor )) {
2018-05-10 12:24:53 +02:00
throw new Twig_Error_Syntax ( sprintf ( 'Unexpected end of file: Unclosed "%s" block.' , $tag ), $this -> lineno , $this -> source );
2011-10-05 06:22:53 +02:00
}
2013-08-01 21:20:12 +02:00
2011-10-05 06:22:53 +02:00
$text = substr ( $this -> code , $this -> cursor , $match [ 0 ][ 1 ] - $this -> cursor );
$this -> moveCursor ( $text . $match [ 0 ][ 0 ]);
2013-08-01 21:20:12 +02:00
if ( false !== strpos ( $match [ 1 ][ 0 ], $this -> options [ 'whitespace_trim' ])) {
$text = rtrim ( $text );
}
$this -> pushToken ( Twig_Token :: TEXT_TYPE , $text );
2011-10-05 06:22:53 +02:00
}
protected function lexComment ()
{
2013-08-01 21:20:12 +02:00
if ( ! preg_match ( $this -> regexes [ 'lex_comment' ], $this -> code , $match , PREG_OFFSET_CAPTURE , $this -> cursor )) {
2018-05-10 12:24:53 +02:00
throw new Twig_Error_Syntax ( 'Unclosed comment.' , $this -> lineno , $this -> source );
2011-10-05 06:22:53 +02:00
}
$this -> moveCursor ( substr ( $this -> code , $this -> cursor , $match [ 0 ][ 1 ] - $this -> cursor ) . $match [ 0 ][ 0 ]);
}
2013-08-01 21:20:12 +02:00
protected function lexString ()
{
if ( preg_match ( $this -> regexes [ 'interpolation_start' ], $this -> code , $match , null , $this -> cursor )) {
$this -> brackets [] = array ( $this -> options [ 'interpolation' ][ 0 ], $this -> lineno );
$this -> pushToken ( Twig_Token :: INTERPOLATION_START_TYPE );
$this -> moveCursor ( $match [ 0 ]);
$this -> pushState ( self :: STATE_INTERPOLATION );
} elseif ( preg_match ( self :: REGEX_DQ_STRING_PART , $this -> code , $match , null , $this -> cursor ) && strlen ( $match [ 0 ]) > 0 ) {
$this -> pushToken ( Twig_Token :: STRING_TYPE , stripcslashes ( $match [ 0 ]));
$this -> moveCursor ( $match [ 0 ]);
} elseif ( preg_match ( self :: REGEX_DQ_STRING_DELIM , $this -> code , $match , null , $this -> cursor )) {
list ( $expect , $lineno ) = array_pop ( $this -> brackets );
2018-05-10 12:24:53 +02:00
if ( '"' != $this -> code [ $this -> cursor ]) {
throw new Twig_Error_Syntax ( sprintf ( 'Unclosed "%s".' , $expect ), $lineno , $this -> source );
2013-08-01 21:20:12 +02:00
}
$this -> popState ();
++ $this -> cursor ;
2018-05-10 12:24:53 +02:00
} else {
// unlexable
throw new Twig_Error_Syntax ( sprintf ( 'Unexpected character "%s".' , $this -> code [ $this -> cursor ]), $this -> lineno , $this -> source );
2013-08-01 21:20:12 +02:00
}
}
protected function lexInterpolation ()
{
$bracket = end ( $this -> brackets );
if ( $this -> options [ 'interpolation' ][ 0 ] === $bracket [ 0 ] && preg_match ( $this -> regexes [ 'interpolation_end' ], $this -> code , $match , null , $this -> cursor )) {
array_pop ( $this -> brackets );
$this -> pushToken ( Twig_Token :: INTERPOLATION_END_TYPE );
$this -> moveCursor ( $match [ 0 ]);
$this -> popState ();
} else {
$this -> lexExpression ();
}
}
2011-10-05 06:22:53 +02:00
protected function pushToken ( $type , $value = '' )
{
// do not push empty text tokens
if ( Twig_Token :: TEXT_TYPE === $type && '' === $value ) {
return ;
}
$this -> tokens [] = new Twig_Token ( $type , $value , $this -> lineno );
}
protected function moveCursor ( $text )
{
$this -> cursor += strlen ( $text );
$this -> lineno += substr_count ( $text , " \n " );
}
protected function getOperatorRegex ()
{
$operators = array_merge (
array ( '=' ),
array_keys ( $this -> env -> getUnaryOperators ()),
array_keys ( $this -> env -> getBinaryOperators ())
);
$operators = array_combine ( $operators , array_map ( 'strlen' , $operators ));
arsort ( $operators );
$regex = array ();
foreach ( $operators as $operator => $length ) {
// an operator that ends with a character must be followed by
// a whitespace or a parenthesis
if ( ctype_alpha ( $operator [ $length - 1 ])) {
2018-05-10 12:24:53 +02:00
$r = preg_quote ( $operator , '/' ) . '(?=[\s()])' ;
2011-10-05 06:22:53 +02:00
} else {
2018-05-10 12:24:53 +02:00
$r = preg_quote ( $operator , '/' );
2011-10-05 06:22:53 +02:00
}
2018-05-10 12:24:53 +02:00
// an operator with a space can be any amount of whitespaces
$r = preg_replace ( '/\s+/' , '\s+' , $r );
$regex [] = $r ;
2011-10-05 06:22:53 +02:00
}
2013-08-01 21:20:12 +02:00
return '/' . implode ( '|' , $regex ) . '/A' ;
}
protected function pushState ( $state )
{
$this -> states [] = $this -> state ;
$this -> state = $state ;
}
protected function popState ()
{
if ( 0 === count ( $this -> states )) {
2018-05-10 12:24:53 +02:00
throw new Exception ( 'Cannot pop state without a previous state.' );
2013-08-01 21:20:12 +02:00
}
$this -> state = array_pop ( $this -> states );
2011-10-05 06:22:53 +02:00
}
}
2018-05-10 12:24:53 +02:00
class_alias ( 'Twig_Lexer' , 'Twig\Lexer' , false );