Knockout
/
Knockout-Lite


			
				
					
						
						
							
							<?php

namespace App\Vendor\BBCode\Tokenizer;

use App\Vendor\BBCode\BBCode;
use App\Vendor\BBCode\Tag\AbstractTagType;

/**
 * "Tokenization is the process of demarcating and possibly classifying
 * sections of a string of input characters" (Source: Wikipedia)
 * The tokenizer operates on the text and tries to split it into parts.
 * The tokenizer is not very smart, it does not really care for grammar.
 */
class Tokenizer
{

    /**
     * Tokenize the text. Returns an array with the tokens.
     * Note: There can be more than one subsequent plain text tokens
     *
     * @param  string            $text      Render the passed BBCode string
     * @param  bool              $escape    Escape HTML entities? (Only "<" and ">"!)
     * @param  bool              $keepLines Keep line breaks by replacing them with <br>?
     * @return Token[]
     */
    public function tokenize($text, $escape = true, $keepLines = true)
    {
        $tokens         = [];
        $length         = mb_strlen($text);
        $value          = '';
        $insideTag      = false; // Means: The current position is between "[" and "]" (=a tag definition)
        $insideName     = false; // In a tag "[code]", "code" is the name of the tag
        $insideString   = false; // Properties of tags can be written as string with " at the start & end
        $noParse        = false; // If true, do not parse BBCode inside this tag
        $tagName        = '';   // Name of the current tag
        $tagProperty    = '';    // Property value of the current tag
        $tagOpening     = null; // True/false + null = undefined
        $tagStartPos    = 0;

        // Loop over each character of the text
        for ($pos = 0; $pos < $length; $pos++) {
            $char = mb_substr($text, $pos, 1);

            if ($keepLines) {
                // Create line break token when \n
                if ($char === "\n") {
                    $tokens[] = new Token($char, Token::TYPE_LINEBREAK, $pos);
                }
                // Ignore \r
                if ($char === "\r") {
                    continue;
                }
            }

            if (! $escape or ($char !== '<' and $char !== '>')) {
                if ($insideTag) {
                    if ($char === '"') {
                        if ($insideString) {
                            $insideString = false;
                        } else {
                            $insideString = true;
                        }
                    } else {
                        // "]" closes a tag (if it is not used in a string)
                        if ($char == ']' and ! $insideString) {
                            if (! $noParse or (! $tagOpening and $this->checkNoParse($value))) {
                                $tokenType = $tagOpening ? Token::TYPE_TAG_OPENING : Token::TYPE_TAG_CLOSING;
                                $tokens[] = new Token($tagName, $tokenType, $tagStartPos, $tagProperty);
                            } else {
                                $tokens[] = new Token($value, Token::TYPE_PLAIN_TEXT, $tagStartPos);
                            }

                            $noParse = $this->checkNoParse($value);

                            $tagName = '';
                            $value = '';
                            $insideTag = false;
                            $insideName = false;
                            continue;
                        }

                        if ($insideName and ! $insideString) {
                            // This makes the current tag a closing tag
                            if ($char === '/') {
                                $tagOpening = false;
                            } else {
                                // This means a property starts
                                if ($char === '=') {
                                    $insideName = false;
                                } elseif ($char === '[') { // Invalid tag - ignore it and start again
                                    $value = '';
                                    $tagName = '';
                                    $tagOpening = true;
                                }
                                else {
                                    $value .= mb_strtolower($char);
                                    $tagName .= mb_strtolower($char);
                                }
                            }
                        } else { // If we are not inside the name we are inside a property
                            $tagProperty .= $char;
                        }
                    }
                } else {
                    if ($char === '[') {
                        // Since a tag starts, plain text may end and we have to create a token for it
                        if ($value !== '') {
                            $tokens[] = new Token($value, Token::TYPE_PLAIN_TEXT, $tagStartPos);
                            $value = '';
                        }

                        $insideTag = true;
                        $insideName = true;
                        $tagOpening = true;
                        $tagStartPos = $pos;
                        $tagName = '';
                    } else {
                        // This is plain text
                        $value .= $char;
                    }
                }
            } else {
                // Escape HTML chars "<" and ">"
                $value .= htmlspecialchars($char);
            }

        }

        // If the text ends with plain text we have to create the final plain text token now
        if ($value !== '') {
            $tokens[] = new Token($value, Token::TYPE_PLAIN_TEXT, $tagStartPos);
        }

        return $tokens;
    }

    /**
     * Check if a tag is a tag that forbids parsing of its inner content
     *
     * @param string $tagName
     * @return bool
     */
    protected function checkNoParse($tagName)
    {
        // We do not want to throw any exceptions so we just return false
        return false;
    }

}