-
Notifications
You must be signed in to change notification settings - Fork 56
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[TASK] Split up ContentParser to multiple classes and interfaces
- Loading branch information
Showing
12 changed files
with
248 additions
and
132 deletions.
There are no files selected for viewing
19 changes: 19 additions & 0 deletions
19
Classes/Service/Preview/ContentExtractors/BaseUrlParser.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors; | ||
|
||
class BaseUrlParser implements BaseUrlParserInterface | ||
{ | ||
public function getBaseUrl(mixed $urlParts): string | ||
{ | ||
if (!is_array($urlParts)) { | ||
return '://'; | ||
} | ||
if ($urlParts['port'] ?? false) { | ||
return (isset($urlParts['scheme']) ? $urlParts['scheme'] . ':' : '') . '//' . ($urlParts['host'] ?? '') . ':' . $urlParts['port']; | ||
} | ||
return (isset($urlParts['scheme']) ? $urlParts['scheme'] . ':' : '') . '//' . ($urlParts['host'] ?? ''); | ||
} | ||
} |
10 changes: 10 additions & 0 deletions
10
Classes/Service/Preview/ContentExtractors/BaseUrlParserInterface.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors; | ||
|
||
interface BaseUrlParserInterface | ||
{ | ||
public function getBaseUrl(mixed $urlParts): string; | ||
} |
52 changes: 52 additions & 0 deletions
52
Classes/Service/Preview/ContentExtractors/BodyProcessor.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors; | ||
|
||
class BodyProcessor implements BodyProcessorInterface | ||
{ | ||
public function getBody(string $content): string | ||
{ | ||
$body = ''; | ||
|
||
$bodyFound = preg_match("/<body[^>]*>(.*)<\/body>/is", $content, $matchesBody); | ||
|
||
if ($bodyFound) { | ||
$body = $matchesBody[1]; | ||
|
||
preg_match_all( | ||
'/<!--\s*?TYPO3SEARCH_begin\s*?-->.*?<!--\s*?TYPO3SEARCH_end\s*?-->/mis', | ||
$body, | ||
$indexableContents | ||
); | ||
|
||
if (is_array($indexableContents[0]) && !empty($indexableContents[0])) { | ||
$body = implode('', $indexableContents[0]); | ||
} | ||
} | ||
|
||
return $this->prepareBody($body); | ||
} | ||
|
||
protected function prepareBody(string $body): string | ||
{ | ||
$body = $this->stripTagsContent($body, '<script><noscript>'); | ||
$body = preg_replace(['/\s?\n\s?/', '/\s{2,}/'], [' ', ' '], $body); | ||
$body = strip_tags((string)$body, '<h1><h2><h3><h4><h5><p><a><img>'); | ||
|
||
return trim($body); | ||
} | ||
|
||
protected function stripTagsContent(string $text, string $tags = ''): string | ||
{ | ||
preg_match_all('/<(.+?)[\s]*\/?[\s]*>/si', trim($tags), $foundTags); | ||
$tagsArray = array_unique($foundTags[1]); | ||
|
||
if (count($tagsArray) > 0) { | ||
return (string)preg_replace('@<(' . implode('|', $tagsArray) . ')\b.*?>.*?</\1>@si', '', $text); | ||
} | ||
|
||
return $text; | ||
} | ||
} |
10 changes: 10 additions & 0 deletions
10
Classes/Service/Preview/ContentExtractors/BodyProcessorInterface.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors; | ||
|
||
interface BodyProcessorInterface | ||
{ | ||
public function getBody(string $content): string; | ||
} |
48 changes: 48 additions & 0 deletions
48
Classes/Service/Preview/ContentExtractors/ContentMetadataExtractor.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors; | ||
|
||
class ContentMetadataExtractor implements ContentMetadataExtractorInterface | ||
{ | ||
public function getTitle(string $content): string | ||
{ | ||
$title = ''; | ||
$titleFound = preg_match("/<title[^>]*>(.*?)<\/title>/is", $content, $matchesTitle); | ||
|
||
if ($titleFound) { | ||
$title = $matchesTitle[1]; | ||
} | ||
|
||
return strip_tags(html_entity_decode($title)); | ||
} | ||
|
||
public function getDescription(string $content): string | ||
{ | ||
$metaDescription = ''; | ||
$descriptionFound = preg_match( | ||
"/<meta[^>]*name=[\" | \']description[\"|\'][^>]*content=[\"]([^\"]*)[\"][^>]*>/i", | ||
$content, | ||
$matchesDescription | ||
); | ||
|
||
if ($descriptionFound) { | ||
$metaDescription = $matchesDescription[1]; | ||
} | ||
|
||
return strip_tags(html_entity_decode($metaDescription)); | ||
} | ||
|
||
public function getLocale(string $content): string | ||
{ | ||
$locale = 'en'; | ||
$localeFound = preg_match('/<html[^>]*lang="([a-z\-A-Z]*)"/is', $content, $matchesLocale); | ||
|
||
if ($localeFound) { | ||
[$locale] = explode('-', trim($matchesLocale[1])); | ||
} | ||
|
||
return $locale; | ||
} | ||
} |
12 changes: 12 additions & 0 deletions
12
Classes/Service/Preview/ContentExtractors/ContentMetadataExtractorInterface.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors; | ||
|
||
interface ContentMetadataExtractorInterface | ||
{ | ||
public function getTitle(string $content): string; | ||
public function getDescription(string $content): string; | ||
public function getLocale(string $content): string; | ||
} |
22 changes: 22 additions & 0 deletions
22
Classes/Service/Preview/ContentExtractors/FaviconExtractor.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors; | ||
|
||
class FaviconExtractor implements FaviconExtractorInterface | ||
{ | ||
public function getFaviconSrc(string $baseUrl, string $content): string | ||
{ | ||
$faviconSrc = $baseUrl . '/favicon.ico'; | ||
$favIconFound = preg_match('/<link rel=\"shortcut icon\" href=\"([^"]*)\"/i', $content, $matchesFavIcon); | ||
if ($favIconFound) { | ||
$faviconSrc = str_contains($matchesFavIcon[1], '://') ? $matchesFavIcon[1] : $baseUrl . $matchesFavIcon[1]; | ||
} | ||
$favIconHeader = @get_headers($faviconSrc); | ||
if (($favIconHeader[0] ?? '') === 'HTTP/1.1 404 Not Found') { | ||
$faviconSrc = ''; | ||
} | ||
return $faviconSrc; | ||
} | ||
} |
10 changes: 10 additions & 0 deletions
10
Classes/Service/Preview/ContentExtractors/FaviconExtractorInterface.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors; | ||
|
||
interface FaviconExtractorInterface | ||
{ | ||
public function getFaviconSrc(string $baseUrl, string $content): string; | ||
} |
24 changes: 24 additions & 0 deletions
24
Classes/Service/Preview/ContentExtractors/TitleConfigurationExtractor.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors; | ||
|
||
class TitleConfigurationExtractor implements TitleConfigurationExtractorInterface | ||
{ | ||
/** | ||
* @return array{titlePrepend: string, titleAppend: string} | ||
*/ | ||
public function getTitleConfiguration(string $content): array | ||
{ | ||
$prepend = $append = ''; | ||
preg_match('/<meta name=\"x-yoast-title-config\" value=\"([^"]*)\"/i', $content, $matchesTitleConfig); | ||
if (count($matchesTitleConfig) > 1) { | ||
[$prepend, $append] = explode('|||', (string)$matchesTitleConfig[1]); | ||
} | ||
return [ | ||
'titlePrepend' => $prepend, | ||
'titleAppend' => $append, | ||
]; | ||
} | ||
} |
10 changes: 10 additions & 0 deletions
10
Classes/Service/Preview/ContentExtractors/TitleConfigurationExtractorInterface.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
<?php | ||
|
||
declare(strict_types=1); | ||
|
||
namespace YoastSeoForTypo3\YoastSeo\Service\Preview\ContentExtractors; | ||
|
||
interface TitleConfigurationExtractorInterface | ||
{ | ||
public function getTitleConfiguration(string $content): array; | ||
} |
Oops, something went wrong.