A bare bones front-end for knockout designed for maximum compatibility with "obsolete" browsers
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

150 lines
5.9KB

  1. <?php
  2. namespace App\Vendor\BBCode\Tokenizer;
  3. use App\Vendor\BBCode\BBCode;
  4. use App\Vendor\BBCode\Tag\AbstractTagType;
  5. /**
  6. * "Tokenization is the process of demarcating and possibly classifying
  7. * sections of a string of input characters" (Source: Wikipedia)
  8. * The tokenizer operates on the text and tries to split it into parts.
  9. * The tokenizer is not very smart, it does not really care for grammar.
  10. */
  11. class Tokenizer
  12. {
  13. /**
  14. * Tokenize the text. Returns an array with the tokens.
  15. * Note: There can be more than one subsequent plain text tokens
  16. *
  17. * @param string $text Render the passed BBCode string
  18. * @param bool $escape Escape HTML entities? (Only "<" and ">"!)
  19. * @param bool $keepLines Keep line breaks by replacing them with <br>?
  20. * @return Token[]
  21. */
  22. public function tokenize($text, $escape = true, $keepLines = true)
  23. {
  24. $tokens = [];
  25. $length = mb_strlen($text);
  26. $value = '';
  27. $insideTag = false; // Means: The current position is between "[" and "]" (=a tag definition)
  28. $insideName = false; // In a tag "[code]", "code" is the name of the tag
  29. $insideString = false; // Properties of tags can be written as string with " at the start & end
  30. $noParse = false; // If true, do not parse BBCode inside this tag
  31. $tagName = ''; // Name of the current tag
  32. $tagProperty = ''; // Property value of the current tag
  33. $tagOpening = null; // True/false + null = undefined
  34. $tagStartPos = 0;
  35. // Loop over each character of the text
  36. for ($pos = 0; $pos < $length; $pos++) {
  37. $char = mb_substr($text, $pos, 1);
  38. if ($keepLines) {
  39. // Create line break token when \n
  40. if ($char === "\n") {
  41. $tokens[] = new Token($char, Token::TYPE_LINEBREAK, $pos);
  42. }
  43. // Ignore \r
  44. if ($char === "\r") {
  45. continue;
  46. }
  47. }
  48. if (! $escape or ($char !== '<' and $char !== '>')) {
  49. if ($insideTag) {
  50. if ($char === '"') {
  51. if ($insideString) {
  52. $insideString = false;
  53. } else {
  54. $insideString = true;
  55. }
  56. } else {
  57. // "]" closes a tag (if it is not used in a string)
  58. if ($char == ']' and ! $insideString) {
  59. if (! $noParse or (! $tagOpening and $this->checkNoParse($value))) {
  60. $tokenType = $tagOpening ? Token::TYPE_TAG_OPENING : Token::TYPE_TAG_CLOSING;
  61. $tokens[] = new Token($tagName, $tokenType, $tagStartPos, $tagProperty);
  62. } else {
  63. $tokens[] = new Token($value, Token::TYPE_PLAIN_TEXT, $tagStartPos);
  64. }
  65. $noParse = $this->checkNoParse($value);
  66. $tagName = '';
  67. $value = '';
  68. $insideTag = false;
  69. $insideName = false;
  70. continue;
  71. }
  72. if ($insideName and ! $insideString) {
  73. // This makes the current tag a closing tag
  74. if ($char === '/') {
  75. $tagOpening = false;
  76. } else {
  77. // This means a property starts
  78. if ($char === '=') {
  79. $insideName = false;
  80. } elseif ($char === '[') { // Invalid tag - ignore it and start again
  81. $value = '';
  82. $tagName = '';
  83. $tagOpening = true;
  84. }
  85. else {
  86. $value .= mb_strtolower($char);
  87. $tagName .= mb_strtolower($char);
  88. }
  89. }
  90. } else { // If we are not inside the name we are inside a property
  91. $tagProperty .= $char;
  92. }
  93. }
  94. } else {
  95. if ($char === '[') {
  96. // Since a tag starts, plain text may end and we have to create a token for it
  97. if ($value !== '') {
  98. $tokens[] = new Token($value, Token::TYPE_PLAIN_TEXT, $tagStartPos);
  99. $value = '';
  100. }
  101. $insideTag = true;
  102. $insideName = true;
  103. $tagOpening = true;
  104. $tagStartPos = $pos;
  105. $tagName = '';
  106. } else {
  107. // This is plain text
  108. $value .= $char;
  109. }
  110. }
  111. } else {
  112. // Escape HTML chars "<" and ">"
  113. $value .= htmlspecialchars($char);
  114. }
  115. }
  116. // If the text ends with plain text we have to create the final plain text token now
  117. if ($value !== '') {
  118. $tokens[] = new Token($value, Token::TYPE_PLAIN_TEXT, $tagStartPos);
  119. }
  120. return $tokens;
  121. }
  122. /**
  123. * Check if a tag is a tag that forbids parsing of its inner content
  124. *
  125. * @param string $tagName
  126. * @return bool
  127. */
  128. protected function checkNoParse($tagName)
  129. {
  130. // We do not want to throw any exceptions so we just return false
  131. return false;
  132. }
  133. }