From 8dfdbe8f4d174e029fd0556c8410725c46ce0031 Mon Sep 17 00:00:00 2001 From: Mauro Cassani Date: Tue, 8 Jun 2021 17:24:11 +0200 Subject: [PATCH] Fix double lt bug (HTMLParser) --- src/Filters/Html/HtmlParser.php | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/src/Filters/Html/HtmlParser.php b/src/Filters/Html/HtmlParser.php index 964bc92..e0e5d53 100644 --- a/src/Filters/Html/HtmlParser.php +++ b/src/Filters/Html/HtmlParser.php @@ -71,15 +71,9 @@ public function transform( $segment ) { $originalSplit = preg_split( '//u', $segment, -1, PREG_SPLIT_NO_EMPTY ); -// $strippedSplit = preg_split( '//u', str_replace( [ "<", ">" ], "", $segment ), -1, PREG_SPLIT_NO_EMPTY ); -// if ( $originalSplit == $strippedSplit ) { -// return $segment; -// } - $state = static::STATE_PLAINTEXT; $html_buffer = ''; $plain_text_buffer = ''; - $depth = 0; $in_quote_char = ''; $output = ''; @@ -106,8 +100,10 @@ public function transform( $segment ) { break; } - // we're seeing a nested '<' - $depth++; + // if we found a second less than symbol the first one IS NOT a tag, + // treat the html_buffer as plain text and attach to the output + $output .= $this->_fixWrongBuffer( $html_buffer ); + $html_buffer = $char; break; case '>': @@ -116,13 +112,6 @@ public function transform( $segment ) { break; } - // something like this is happening: '<<>>' - if ( $depth ) { - $depth--; - - break; - } - if ( in_array( substr( $html_buffer, 0, 6 ), [ '