Parcourir la source

Bring in a better tokenizer from the dev branch

staging
Parent
révision
ab0469bb08
5 fichiers modifiés avec 484 ajouts et 88 suppressions
  1. +88
    -2
      app/Helper/BBCode.php
  2. +46
    -86
      app/Vendor/BBCode/BBCode.php
  3. +32
    -0
      app/Vendor/BBCode/Tag/AbstractTag.php
  4. +169
    -0
      app/Vendor/BBCode/Tokenizer/Token.php
  5. +149
    -0
      app/Vendor/BBCode/Tokenizer/Tokenizer.php

+ 88
- 2
app/Helper/BBCode.php Voir le fichier

@@ -14,19 +14,105 @@ class BBCode {
{
$this->parser = new Parser();
$this->content = $content;
$this->defineTags();
//$this->defineTags();
}

private function defineTags()
{
// bold
$this->parser->addTag('b', function($tag, &$html, $openingTag) {
return $openingTag ? '<strong>' : '</strong>';
});

// italics
$this->parser->addTag('i', function($tag, &$html, $openingTag) {
return $openingTag ? '<em>' : '</em>';
});

// underline
$this->parser->addTag('u', function($tag, &$html, $openingTag) {
return $openingTag ? '<u>' : '</u>';
});

// strikethrough
$this->parser->addTag('s', function($tag, &$html, $openingTag) {
return $openingTag ? '<s>' : '</s>';
});

// spoiler
$this->parser->addTag('spoiler', function($tag, &$html, $openingTag) {
return $openingTag ? '<span class="spoiler">' : '</span>';
});

// heading 1
$this->parser->addTag('h1', function($tag, &$html, $openingTag) {
return $openingTag ? '<h1>' : '</h1>';
});

// heading 2
$this->parser->addTag('h2', function($tag, &$html, $openingTag) {
return $openingTag ? '<h2>' : '</h2>';
});

// hyperlink
$this->parser->addTag('url', function($tag, &$html, $openingTag) {
if (!$openingTag) return '</a>';
return sprintf('<a href="%s">', $tag->property ?? $html);
});

// blockquote
$this->parser->addTag('blockquote', function($tag, &$html, $openingTag) {
return $openingTag ? '<div class="blockquote">' : '</div>';
});

// code
$this->parser->addTag('code', function($tag, &$html, $openingTag) {
return $openingTag ? '<pre>' : '</pre>';
});

// unordered list
$this->parser->addTag('ul', function($tag, &$html, $openingTag) {
return $openingTag ? '<ul>' : '</ul>';
});

// ordered list
$this->parser->addTag('ol', function($tag, &$html, $openingTag) {
return $openingTag ? '<ol>' : '</ol>';
});

// list item
$this->parser->addTag('li', function($tag, &$html, $openingTag) {
return $openingTag ? '<li>' : '</li>';
});

// quote
$this->parser->addTag('quote', function($tag, &$html, $openingTag) {
return $this->renderQuote($tag, $html, $openingTag);
});

// quote shorthand
$this->parser->addTag('q', function($tag, &$html, $openingTag) {
return $this->renderQuote($tag, $html, $openingTag);
});

}

public function renderQuote($tag, &$html, $openingTag)
{
exit('kek');
if (!$openingTag) return '</div></div>';
return '<div class="quote"><a class="user" href="#">User</a><div class="postContent">';
// '<div class="quote"><a class="user" href="/thread/%d/%d#post-%d">%s</a><div class="postContent">',
// $postData->threadId,
// $postData->threadPage,
// $postData->postId,
// $postData->username
}

public function render()
{
return $this->parser->render($this->content);
//$test = '[quote mentionsUser="1427" postId="659032" threadPage="1" threadId="19764" username="Dr. Magnusson"][/quote]';
return $this->parser->renderPlain($this->content);
}

}

+ 46
- 86
app/Vendor/BBCode/BBCode.php Voir le fichier

@@ -2,6 +2,10 @@

namespace App\Vendor\BBCode;

use App\Vendor\BBCode\Tags\BoldTag;
use App\Vendor\BBCode\Tags\NoParseTag;
use App\Vendor\BBCode\Tokenizer\Token;
use App\Vendor\BBCode\Tokenizer\Tokenizer;
use Closure;

/*
@@ -14,6 +18,8 @@ use Closure;
*/
class Bbcode {

protected $tagTypes = [];

/**
* The text with BBCodes
*
@@ -21,21 +27,6 @@ class Bbcode {
*/
protected $text = null;

/**
* Array with custom tag Closures
*
* @var Closure[]
*/
protected $customTagClosures = array();

/**
* Array of (name of) tags that are ignored
*
* @var string[]
*/
protected $ignoredTags = array();


/**
* BBCode constructor.
*
@@ -100,6 +91,45 @@ class Bbcode {
$text = $this->text;
}

$tokenizer = new Tokenizer($this->tagTypes);
$tokens = $tokenizer->tokenize($text, $escape, $keepLines);

$html = '';
$level = 0;
foreach ($tokens as $index => $token) {
switch ($token->getType()) {
case Token::TYPE_LINEBREAK:
$html .= '<br/>';
break;
case Token::TYPE_PLAIN_TEXT:
$html .= $token->getValue();
break;
case Token::TYPE_TAG_OPENING:
case Token::TYPE_TAG_CLOSING:
$tagOpening = $token->getType() === Token::TYPE_TAG_OPENING;
$tagName = $token->getValue();

// If the tag is not known, just do not render it. We do not want to throw any exceptions.
if (isset($this->tagTypes[$tagName])) {
$tagType = $this->tagTypes[$tagName];

/** @var AbstractTagType $tag */
$tag = new $tagType;
$tag->render($html, $tagOpening);
}


if ($tagOpening) {
$level++;
} else {
$level--;
}
}
}

return $html;


$html = '';
$len = mb_strlen($text);
$inTag = false; // True if current position is inside a tag
@@ -249,6 +279,7 @@ class Bbcode {
// Custom tags:
foreach ($this->customTagClosures as $name => $closure) {
if ($tag->name === $name) {
exit($tag->name);
$code .= $closure($tag, $html, $openingTag);
}
}
@@ -288,77 +319,6 @@ class Bbcode {
}
}

/**
* Adds a custom tag (with name and a Closure)
*
* Example:
*
* $bbcode->addTag('example', function($tag, &$html, $openingTag) {
* if ($tag->opening) {
* return '<span class="example">';
* } else {
* return '</span>';
* }
* });
*
* @param string $name The name of the tag
* @param Closure $closure The Closure that renders the tag
* @return void
*/
public function addTag($name, Closure $closure)
{
$this->customTagClosures[$name] = $closure;
}

/**
* Remove the custom tag with the given name
*
* @param string $name
* @return void
*/
public function forgetTag($name)
{
unset($this->customTagClosures[$name]);
}

/**
* Add a tag to the array of ignored tags
*
* @param string $name The name of the tag
* @return void
*/
public function ignoreTag($name)
{
if (! in_array($name, $this->ignoredTags)) {
$this->ignoredTags[] = $name;
}
}

/**
* Remove a tag from the array of ignored tags
*
* @param string $name The name of the tag
* @return void
*/
public function permitTag($name)
{
$key = array_search($name, $this->ignoredTags);

if ($key !== false) {
unset($this->ignoredTags[$key]);
}
}

/**
* Returns an array with the name of the tags that are ignored
*
* @return string[]
*/
public function getIgnoredTags()
{
return $this->ignoredTags;
}

/**
* Returns true if $haystack ends with $needle
*


+ 32
- 0
app/Vendor/BBCode/Tag/AbstractTag.php Voir le fichier

@@ -0,0 +1,32 @@
<?php

namespace App\Vendor\BBCode\Tags;

/**
* This class is the abstract base class for all BBCode tag classes.
*/
abstract class AbstractTag
{
/**
* The name of the tag type (lower cased).
* The sub class has to overwrite this constant.
*/
const NAME = '';

/**
* If true, inner tags will be treated as plain text
* The sub class has to overwrite this constant.
*/
const NO_PARSE = false;

/**
* This method renders a tag of this type.
* Has to return something, at least an empty string.
*
* @param string $html The generated HTML code so far - passed by reference
* @param bool $opening Is the tag opening (true) or closing (false)?
* @return void
*/
abstract public function render(&$html, $opening);

}

+ 169
- 0
app/Vendor/BBCode/Tokenizer/Token.php Voir le fichier

@@ -0,0 +1,169 @@
<?php

namespace App\Vendor\BBCode\Tokenizer;

/**
* The tokenizer splits a term into an array of tokens.
* Tokens are the parts of a text.
*/
class Token
{

/**
* Defines the type of a token.
* Example token value of a token with this type:
* '\n'
*
* @const int
*/
const TYPE_LINEBREAK = 0;

/**
* Defines the type of a token.
* Example token value of a token with this type:
* 'Hello world'
*
* @const int
*/
const TYPE_PLAIN_TEXT = 1;

/**
* Defines the type of a token.
* Example token value of a token with this type:
* '[b]'
*
* @const int
*/
const TYPE_TAG_OPENING = 2;

/**
* Defines the type of a token.
* Example token value of a token with this type:
* '[/b]'
*
* @const int
*/
const TYPE_TAG_CLOSING = 3;

/**
* The raw value of the token. Numbers are stored as string.
*
* @var string
*/
protected $value = null;

/**
* The type of the token. One of these constants:
* self::TYPE_WORD|self::TYPE_NUMBER|self::TYPE_CHARACTER
*
* @var int
*/
protected $type;

/**
* Position of the token in the input stream.
* It is stored as a debugging information.
*
* @var int
*/
protected $position;

/**
* The property value of the token (empty string = none)
*
* @var string
*/
protected $property;

/**
* Token constructor. The position must be >= 0.
*
* @param string $value The value of the token
* @param string $type The type of the token - one of these: self::TYPE_<NAME>
* @param int $position The position of the token in the original text
* @param string $property Optional: The property value of the token
*/
public function __construct($value, $type, $position, $property = '')
{
if (! is_string($value)) {
throw new \InvalidArgumentException(
'Error: Argument "value" has to be of type string but is of type "'.gettype($value).'"'
);
}
$this->value = $value;

if (! in_array($type, $this->getAllTypes())) {
throw new \InvalidArgumentException(
'Error: Argument "type" does not have the value of a known token type'
);
}
$this->type = $type;

if (! is_int($position)) {
throw new \InvalidArgumentException('Error: Argument "position" has to be of type int');
}
if ($position < 0) {
throw new \InvalidArgumentException('Error: Value of parameter "position" has to be >= zero');
}
$this->position = $position;

if (! is_string($property)) {
throw new \InvalidArgumentException(
'Error: Argument "property" has to be of type string but is of type "'.gettype($property).'"'
);
}
$this->property = $property;
}

/**
* Returns an array that contains the values of all
* possible types of token type constants.
*
* @see self::TYPE_<NAME>
*
* @return int[]
*/
public function getAllTypes()
{
return [self::TYPE_LINEBREAK, self::TYPE_PLAIN_TEXT, self::TYPE_TAG_OPENING, self::TYPE_TAG_CLOSING];
}

/**
* Getter for the value
*
* @return string
*/
public function getValue()
{
return $this->value;
}

/**
* Getter for the type
*
* @return string
*/
public function getType()
{
return $this->type;
}

/**
* Getter for the position
*
* @return int
*/
public function getPosition()
{
return $this->position;
}

/**
* @return string
*/
public function __toString()
{
return $this->value;
}

}

+ 149
- 0
app/Vendor/BBCode/Tokenizer/Tokenizer.php Voir le fichier

@@ -0,0 +1,149 @@
<?php

namespace App\Vendor\BBCode\Tokenizer;

use App\Vendor\BBCode\BBCode;
use App\Vendor\BBCode\Tag\AbstractTagType;

/**
* "Tokenization is the process of demarcating and possibly classifying
* sections of a string of input characters" (Source: Wikipedia)
* The tokenizer operates on the text and tries to split it into parts.
* The tokenizer is not very smart, it does not really care for grammar.
*/
class Tokenizer
{

/**
* Tokenize the text. Returns an array with the tokens.
* Note: There can be more than one subsequent plain text tokens
*
* @param string $text Render the passed BBCode string
* @param bool $escape Escape HTML entities? (Only "<" and ">"!)
* @param bool $keepLines Keep line breaks by replacing them with <br>?
* @return Token[]
*/
public function tokenize($text, $escape = true, $keepLines = true)
{
$tokens = [];
$length = mb_strlen($text);
$value = '';
$insideTag = false; // Means: The current position is between "[" and "]" (=a tag definition)
$insideName = false; // In a tag "[code]", "code" is the name of the tag
$insideString = false; // Properties of tags can be written as string with " at the start & end
$noParse = false; // If true, do not parse BBCode inside this tag
$tagName = ''; // Name of the current tag
$tagProperty = ''; // Property value of the current tag
$tagOpening = null; // True/false + null = undefined
$tagStartPos = 0;

// Loop over each character of the text
for ($pos = 0; $pos < $length; $pos++) {
$char = mb_substr($text, $pos, 1);

if ($keepLines) {
// Create line break token when \n
if ($char === "\n") {
$tokens[] = new Token($char, Token::TYPE_LINEBREAK, $pos);
}
// Ignore \r
if ($char === "\r") {
continue;
}
}

if (! $escape or ($char !== '<' and $char !== '>')) {
if ($insideTag) {
if ($char === '"') {
if ($insideString) {
$insideString = false;
} else {
$insideString = true;
}
} else {
// "]" closes a tag (if it is not used in a string)
if ($char == ']' and ! $insideString) {
if (! $noParse or (! $tagOpening and $this->checkNoParse($value))) {
$tokenType = $tagOpening ? Token::TYPE_TAG_OPENING : Token::TYPE_TAG_CLOSING;
$tokens[] = new Token($tagName, $tokenType, $tagStartPos, $tagProperty);
} else {
$tokens[] = new Token($value, Token::TYPE_PLAIN_TEXT, $tagStartPos);
}

$noParse = $this->checkNoParse($value);

$tagName = '';
$value = '';
$insideTag = false;
$insideName = false;
continue;
}

if ($insideName and ! $insideString) {
// This makes the current tag a closing tag
if ($char === '/') {
$tagOpening = false;
} else {
// This means a property starts
if ($char === '=') {
$insideName = false;
} elseif ($char === '[') { // Invalid tag - ignore it and start again
$value = '';
$tagName = '';
$tagOpening = true;
}
else {
$value .= mb_strtolower($char);
$tagName .= mb_strtolower($char);
}
}
} else { // If we are not inside the name we are inside a property
$tagProperty .= $char;
}
}
} else {
if ($char === '[') {
// Since a tag starts, plain text may end and we have to create a token for it
if ($value !== '') {
$tokens[] = new Token($value, Token::TYPE_PLAIN_TEXT, $tagStartPos);
$value = '';
}

$insideTag = true;
$insideName = true;
$tagOpening = true;
$tagStartPos = $pos;
$tagName = '';
} else {
// This is plain text
$value .= $char;
}
}
} else {
// Escape HTML chars "<" and ">"
$value .= htmlspecialchars($char);
}

}

// If the text ends with plain text we have to create the final plain text token now
if ($value !== '') {
$tokens[] = new Token($value, Token::TYPE_PLAIN_TEXT, $tagStartPos);
}

return $tokens;
}

/**
* Check if a tag is a tag that forbids parsing of its inner content
*
* @param string $tagName
* @return bool
*/
protected function checkNoParse($tagName)
{
// We do not want to throw any exceptions so we just return false
return false;
}

}

Chargement…
Annuler
Enregistrer