diff --git a/Classes/Command/CheckLinksCommandController.php b/Classes/Command/CheckLinksCommandController.php index d3e3cec..117ce04 100644 --- a/Classes/Command/CheckLinksCommandController.php +++ b/Classes/Command/CheckLinksCommandController.php @@ -12,14 +12,9 @@ use CodeQ\LinkChecker\Infrastructure\CrawlNonExcludedUrls; use CodeQ\LinkChecker\Domain\Notification\NotificationServiceInterface; use CodeQ\LinkChecker\Infrastructure\OriginUrlException; -use GuzzleHttp\Exception\ConnectException; -use GuzzleHttp\HandlerStack; -use GuzzleHttp\Middleware; -use GuzzleHttp\Psr7\Request; -use GuzzleHttp\Psr7\Response; +use CodeQ\LinkChecker\Infrastructure\WebCrawlerFactory; use GuzzleHttp\Psr7\ServerRequest; use GuzzleHttp\Psr7\Uri; -use GuzzleHttp\RequestOptions; use Neos\ContentRepository\Domain\Service\ContextFactoryInterface; use Neos\Flow\Annotations as Flow; use Neos\Flow\Cli\CommandController; @@ -30,9 +25,6 @@ use Neos\Neos\Domain\Service\ContentContext; use Neos\Utility\ObjectAccess; use Psr\Http\Message\UriInterface; -use Spatie\Crawler\Crawler; -use Spatie\Crawler\CrawlObservers\CrawlObserver; -use Spatie\Crawler\CrawlProfiles\CrawlProfile; /** * @Flow\Scope("singleton") @@ -69,6 +61,12 @@ class CheckLinksCommandController extends CommandController */ protected $uriFactory; + /** + * @var WebCrawlerFactory + * @Flow\Inject + */ + protected $webCrawlerFactory; + /** * @Flow\Inject * @var ResultItemRepositoryInterface @@ -76,10 +74,10 @@ class CheckLinksCommandController extends CommandController protected $resultItemRepository; /** - * @var string - * @Flow\InjectConfiguration(path="notifications.service") + * @Flow\InjectConfiguration(path="notifications") + * @var array */ - protected $notificationServiceClass; + protected $notificationSettings; /** * @var BaseUriProvider @@ -87,16 +85,6 @@ class CheckLinksCommandController extends CommandController */ protected $baseUriProvider; - protected array $settings; - - /** - * Inject the settings - */ - public function injectSettings(array $settings): void - { - $this->settings = $settings; - } - /** * Clear all stored errors * @@ -200,7 +188,7 @@ private function crawlExternalCommandImplementation(array $domainsToCrawl, int & $crawlProfile = new CrawlNonExcludedUrls(); $crawlObserver = new LogAndPersistResultCrawlObserver(); - $crawler = self::createCrawler($this->settings['clientOptions'] ?? [], $crawlProfile, $crawlObserver); + $crawler = $this->webCrawlerFactory->createCrawler($crawlProfile, $crawlObserver); foreach ($domainsToCrawl as $domainToCrawl) { @@ -235,87 +223,6 @@ private function ensureDomainsNotEmpty(array $domains): void } } - private static function createCrawler(array $settings, CrawlProfile $crawlProfile, CrawlObserver $crawlObserver): Crawler - { - // If no settings are configured we just set timeout and allow_redirect. - $clientOptions = [ - RequestOptions::TIMEOUT => 100, - RequestOptions::ALLOW_REDIRECTS => false, - ]; - - if (isset($settings['cookies']) && is_bool($settings['cookies'])) { - $clientOptions[RequestOptions::COOKIES] = $settings['cookies']; - } - - if (isset($settings['connectionTimeout']) && is_numeric($settings['connectionTimeout'])) { - $clientOptions[RequestOptions::CONNECT_TIMEOUT] = (int)$settings['connectionTimeout']; - } - - if (isset($settings['timeout']) && is_numeric($settings['timeout'])) { - $clientOptions[RequestOptions::TIMEOUT] = (int)$settings['timeout']; - } - - if (isset($settings['allowRedirects']) && is_bool($settings['allowRedirects'])) { - $clientOptions[RequestOptions::ALLOW_REDIRECTS] = $settings['allowRedirects']; - } - - if ( - isset($settings['auth']) && is_array($settings['auth']) - && count($settings['auth']) > 1 - ) { - $clientOptions[RequestOptions::AUTH] = $settings['auth']; - } - - $handler = HandlerStack::create(); - - if (isset($settings['retryAttempts']) && is_numeric($settings['retryAttempts']) && $settings['retryAttempts'] >= 0) { - - $retryAttempts = (int)$settings['retryAttempts']; - - $handler->push( - Middleware::retry( - function ( - $retries, - Request $request, - Response $response = null, - \Exception $exception = null - ) use($retryAttempts) { - if ($retries >= $retryAttempts) { - return false; - } - if ($exception instanceof ConnectException) { - return true; - } - return false; - }, - function ( - $numberOfRetries - ) { - return 1000 * $numberOfRetries; - } - ) - ); - } - - $clientOptions["handler"] = $handler; - - $crawler = Crawler::create($clientOptions) - ->setCrawlObserver($crawlObserver) - ->setCrawlProfile($crawlProfile); - - $concurrency = 10; - if (isset($settings['concurrency']) && (int)$settings['concurrency'] >= 0) { - $concurrency = (int)$settings['concurrency']; - } - $crawler->setConcurrency($concurrency); - - if (!isset($settings['ignoreRobots']) || $settings['ignoreRobots']) { - $crawler->ignoreRobots(); - } - - return $crawler; - } - private function createLinkCheckerDashboardUriFromStuff(array $domains): UriInterface { $firstDomain = $domains[0]; @@ -353,11 +260,11 @@ private function sendNotificationIfNecessary(int $errorCount, UriInterface $link return; } - if (!$this->settings['notifications']['enabled']) { + if (!$this->notificationSettings['enabled']) { return; } - $notificationServiceClass = trim($this->notificationServiceClass); + $notificationServiceClass = trim($this->notificationSettings['service']); if ($notificationServiceClass === '') { $errorMessage = 'No notification service has been configured, but the notification handling is enabled'; throw new \InvalidArgumentException($errorMessage, 1540201992); @@ -372,7 +279,7 @@ private function sendNotificationIfNecessary(int $errorCount, UriInterface $link ); } $notificationService->sendNotification( - $this->settings['notifications']['subject'] ?? '', + $this->notificationSettings['subject'] ?? '', [ 'errorCount' => $errorCount, 'linkCheckerDashboardUri' => $linkCheckerDashboardUri diff --git a/Classes/Infrastructure/EmailService.php b/Classes/Infrastructure/EmailService.php index e9559c5..b659ad1 100644 --- a/Classes/Infrastructure/EmailService.php +++ b/Classes/Infrastructure/EmailService.php @@ -2,17 +2,12 @@ namespace CodeQ\LinkChecker\Infrastructure; -use CodeQ\LinkChecker\Domain\Model\ResultItem; use CodeQ\LinkChecker\Domain\Notification\NotificationServiceInterface; -use League\Csv\CannotInsertRecord; -use League\Csv\Exception; use Neos\Flow\Annotations as Flow; use Neos\Flow\Configuration\ConfigurationManager; use Neos\FluidAdaptor\View\StandaloneView; use Neos\SwiftMailer\Message; -use League\Csv\Writer; use Psr\Log\LoggerInterface; -use Swift_Attachment; /** * @Flow\Scope("singleton") diff --git a/Classes/Infrastructure/WebCrawlerFactory.php b/Classes/Infrastructure/WebCrawlerFactory.php new file mode 100644 index 0000000..3ea4c5a --- /dev/null +++ b/Classes/Infrastructure/WebCrawlerFactory.php @@ -0,0 +1,110 @@ +settings are configured we just set timeout and allow_redirect. + $clientOptions = [ + RequestOptions::TIMEOUT => 100, + RequestOptions::ALLOW_REDIRECTS => false, + ]; + + if (isset($this->settings['cookies']) && is_bool($this->settings['cookies'])) { + $clientOptions[RequestOptions::COOKIES] = $this->settings['cookies']; + } + + if (isset($this->settings['connectionTimeout']) && is_numeric($this->settings['connectionTimeout'])) { + $clientOptions[RequestOptions::CONNECT_TIMEOUT] = (int)$this->settings['connectionTimeout']; + } + + if (isset($this->settings['timeout']) && is_numeric($this->settings['timeout'])) { + $clientOptions[RequestOptions::TIMEOUT] = (int)$this->settings['timeout']; + } + + if (isset($this->settings['allowRedirects']) && is_bool($this->settings['allowRedirects'])) { + $clientOptions[RequestOptions::ALLOW_REDIRECTS] = $this->settings['allowRedirects']; + } + + if ( + isset($this->settings['auth']) && is_array($this->settings['auth']) + && count($this->settings['auth']) > 1 + ) { + $clientOptions[RequestOptions::AUTH] = $this->settings['auth']; + } + + $handler = HandlerStack::create(); + + if (isset($this->settings['retryAttempts']) && is_numeric($this->settings['retryAttempts']) && $this->settings['retryAttempts'] >= 0) { + $handler->push( + self::createRetryOnConnectionTimedOutMiddleware((int)$this->settings['retryAttempts']) + ); + } + + $clientOptions["handler"] = $handler; + + $crawler = new Crawler(new Client($clientOptions)); + + $crawler->setCrawlObserver($crawlObserver); + $crawler->setCrawlProfile($crawlProfile); + + $concurrency = 10; + if (isset($this->settings['concurrency']) && (int)$this->settings['concurrency'] >= 0) { + $concurrency = (int)$this->settings['concurrency']; + } + $crawler->setConcurrency($concurrency); + + if (!isset($this->settings['ignoreRobots']) || $this->settings['ignoreRobots']) { + $crawler->ignoreRobots(); + } + + return $crawler; + } + + private static function createRetryOnConnectionTimedOutMiddleware(int $retryAttempts) { + return Middleware::retry( + function ( + $retries, + Request $request, + Response $response = null, + \Exception $exception = null + ) use($retryAttempts) { + if ($retries >= $retryAttempts) { + return false; + } + if ($exception instanceof ConnectException) { + return true; + } + return false; + }, + function ( + $numberOfRetries + ) { + return 1000 * $numberOfRetries; + } + ); + } +}