From 7d579e17a3c6fa9092bc3f628b4b86d0021868ce Mon Sep 17 00:00:00 2001 From: kasp3r Date: Thu, 14 Apr 2016 14:02:04 +0300 Subject: [PATCH 1/3] * Updated guzzle and PHPUnit to the latest stable versions * Added getPictures method to return all images on a page --- README.md | 11 ++++++- composer.json | 6 ++-- src/LinkPreview/Model/Link.php | 24 +++++++++++++++ src/LinkPreview/Model/LinkInterface.php | 13 +++++++++ src/LinkPreview/Parser/GeneralParser.php | 12 ++++++-- src/LinkPreview/Reader/GeneralReader.php | 29 ++++++++++++++----- .../Tests/Reader/GeneralReaderTest.php | 29 +++++-------------- 7 files changed, 88 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 3e731b5..b539774 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ A PHP library to easily get website information (title, description, image...) f ## Dependencies -* PHP >= 5.4 +* PHP >= 5.5 * Guzzle ## Installation @@ -48,6 +48,7 @@ foreach ($parsed as $parserName => $link) { echo $link->getTitle() . PHP_EOL; echo $link->getDescription() . PHP_EOL; echo $link->getImage() . PHP_EOL; + print_r($link->getPictures()); } ``` @@ -62,6 +63,14 @@ https://github.com/ GitHub ยท Build software better, together. GitHub is the best place to build software together. Over 10.1 million people use GitHub to share code. https://assets-cdn.github.com/images/modules/open_graph/github-octocat.png +Array +( + [0] => https://assets-cdn.github.com/images/modules/site/home-ill-build.png?sn + [1] => https://assets-cdn.github.com/images/modules/site/home-ill-work.png?sn + [2] => https://assets-cdn.github.com/images/modules/site/home-ill-projects.png?sn + [3] => https://assets-cdn.github.com/images/modules/site/home-ill-platform.png?sn + [4] => https://assets-cdn.github.com/images/modules/site/org_example_nasa.png?sn +) ``` ###Youtube example diff --git a/composer.json b/composer.json index 67670be..5db7ef8 100644 --- a/composer.json +++ b/composer.json @@ -13,11 +13,11 @@ } ], "require": { - "php": ">=5.3.0", - "guzzle/guzzle": "~3.8" + "php": ">=5.5", + "guzzlehttp/guzzle": "^6.2" }, "require-dev": { - "phpunit/phpunit": "3.7.*" + "phpunit/phpunit": "^5.3" }, "autoload": { "psr-0": { diff --git a/src/LinkPreview/Model/Link.php b/src/LinkPreview/Model/Link.php index 6b93f77..40f49db 100644 --- a/src/LinkPreview/Model/Link.php +++ b/src/LinkPreview/Model/Link.php @@ -23,6 +23,10 @@ class Link implements LinkInterface * @var string $image Url to image */ private $image; + /** + * @var array $pictures Urls to all images on a page + */ + private $pictures; /** * @var string $realUrl */ @@ -171,4 +175,24 @@ public function setUrl($url) return $this; } + + /** + * @param array $pictures + * @return $this + */ + public function setPictures($pictures) + { + $this->pictures = $pictures; + + return $this; + } + + /** + * Get Urls to all images on a page + * @return array + */ + public function getPictures() + { + return $this->pictures; + } } \ No newline at end of file diff --git a/src/LinkPreview/Model/LinkInterface.php b/src/LinkPreview/Model/LinkInterface.php index fe84a87..58a03fd 100644 --- a/src/LinkPreview/Model/LinkInterface.php +++ b/src/LinkPreview/Model/LinkInterface.php @@ -31,6 +31,12 @@ public function getDescription(); */ public function getImage(); + /** + * Get pictures urls + * @return array + */ + public function getPictures(); + /** * Get real url after all redirects * @return string @@ -77,6 +83,13 @@ public function setDescription($description); */ public function setImage($image); + /** + * Set pictures urls + * @param array $pictures + * @return $this + */ + public function setPictures($pictures); + /** * Set real url after all redirects * @param string $realUrl diff --git a/src/LinkPreview/Parser/GeneralParser.php b/src/LinkPreview/Parser/GeneralParser.php index fed5d38..0529dd0 100644 --- a/src/LinkPreview/Parser/GeneralParser.php +++ b/src/LinkPreview/Parser/GeneralParser.php @@ -42,7 +42,7 @@ class GeneralParser implements ParserInterface /** * @param ReaderInterface $reader - * @param LinkInterface $link + * @param LinkInterface $link */ public function __construct(ReaderInterface $reader = null, LinkInterface $link = null) { @@ -134,7 +134,8 @@ public function parseLink() $link->setTitle($htmlData['title']) ->setDescription($htmlData['description']) - ->setImage($htmlData['image']); + ->setImage($htmlData['image']) + ->setPictures($htmlData['pictures']); } elseif (!strncmp($link->getContentType(), 'image/', strlen('image/'))) { $link->setImage($link->getRealUrl()); } @@ -152,7 +153,8 @@ protected function parseHtml($html) $data = [ 'image' => '', 'title' => '', - 'description' => '' + 'description' => '', + 'pictures' => [], ]; libxml_use_internal_errors(true); @@ -200,6 +202,10 @@ protected function parseHtml($html) } } + foreach ($doc->getElementsByTagName('img') as $img) { + $data['pictures'][] = $img->getAttribute('src'); + } + return $data; } diff --git a/src/LinkPreview/Reader/GeneralReader.php b/src/LinkPreview/Reader/GeneralReader.php index dc90cc3..cf60e4d 100644 --- a/src/LinkPreview/Reader/GeneralReader.php +++ b/src/LinkPreview/Reader/GeneralReader.php @@ -2,7 +2,9 @@ namespace LinkPreview\Reader; -use Guzzle\Http\Client; +use GuzzleHttp\Client; +use GuzzleHttp\RequestOptions; +use GuzzleHttp\TransferStats; use LinkPreview\Model\LinkInterface; /** @@ -25,7 +27,7 @@ class GeneralReader implements ReaderInterface public function getClient() { if (!$this->client) { - $this->client = new Client(); + $this->client = new Client([RequestOptions::COOKIES => true]); } return $this->client; @@ -65,12 +67,25 @@ public function readLink() $link = $this->getLink(); $client = $this->getClient(); - $client->setBaseUrl($link->getUrl()); - $response = $client->get()->send(); + $response = $client->request( + 'GET', + $link->getUrl(), + [ + 'on_stats' => function (TransferStats $stats) use (&$effectiveUrl) { + $effectiveUrl = $stats->getEffectiveUri(); + } + ] + ); - $link->setContent($response->getBody(true)) - ->setContentType($response->getContentType()) - ->setRealUrl($response->getEffectiveUrl()); + $headerContentType = $response->getHeader('content-type'); + $contentType = ''; + if (is_array($headerContentType) && count($headerContentType) > 0) { + $contentType = current(explode(';', current($headerContentType))); + } + + $link->setContent((string)$response->getBody()) + ->setContentType($contentType) + ->setRealUrl($effectiveUrl); return $link; } diff --git a/tests/LinkPreview/Tests/Reader/GeneralReaderTest.php b/tests/LinkPreview/Tests/Reader/GeneralReaderTest.php index 76093b4..89db352 100644 --- a/tests/LinkPreview/Tests/Reader/GeneralReaderTest.php +++ b/tests/LinkPreview/Tests/Reader/GeneralReaderTest.php @@ -9,8 +9,8 @@ class GeneralReaderTest extends \PHPUnit_Framework_TestCase public function testReadLink() { $responseMock = $this->getMock( - 'Guzzle\Http\Message\Response', - ['getBody', 'getContentType', 'getEffectiveUrl'], + 'GuzzleHttp\Psr7\Response', + ['getBody', 'getHeader'], [], '', false @@ -19,27 +19,13 @@ public function testReadLink() ->method('getBody') ->will(self::returnValue('body')); $responseMock->expects(self::once()) - ->method('getContentType') - ->will(self::returnValue('text/html')); - $responseMock->expects(self::once()) - ->method('getEffectiveUrl') - ->will(self::returnValue('http://github.com')); - - $requestMock = $this->getMock( - 'Guzzle\Http\Message\Request', - ['send'], - [], - '', - false - ); - $requestMock->expects(self::once()) - ->method('send') - ->will(self::returnValue($responseMock)); + ->method('getHeader') + ->will(self::returnValue(array('text/html; UTF-8'))); - $clientMock = $this->getMock('Guzzle\Http\Client'); + $clientMock = $this->getMock('GuzzleHttp\Client'); $clientMock->expects(self::once()) - ->method('get') - ->will(self::returnValue($requestMock)); + ->method('request') + ->will(self::returnValue($responseMock)); $linkMock = $this->getMock('LinkPreview\Model\Link', null); @@ -50,6 +36,5 @@ public function testReadLink() self::assertEquals('body', $link->getContent()); self::assertEquals('text/html', $link->getContentType()); - self::assertEquals('http://github.com', $link->getRealUrl()); } } From d59bbf30de6e7109ce251f6124f5772e76547762 Mon Sep 17 00:00:00 2001 From: kasp3r Date: Thu, 14 Apr 2016 14:09:55 +0300 Subject: [PATCH 2/3] * updated travis --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 00b8c32..0d39a32 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,9 @@ language: php php: - - 5.4 - 5.5 + - 5.6 + - 7 before_script: - composer install --dev \ No newline at end of file From eca77943d296cdc0defe987319bb84cde8a21f30 Mon Sep 17 00:00:00 2001 From: kasp3r Date: Thu, 14 Apr 2016 14:15:07 +0300 Subject: [PATCH 3/3] * updated travis --- .travis.yml | 1 - README.md | 2 +- composer.json | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 0d39a32..bd483c6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ language: php php: - - 5.5 - 5.6 - 7 diff --git a/README.md b/README.md index b539774..e3e3d4e 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ A PHP library to easily get website information (title, description, image...) f ## Dependencies -* PHP >= 5.5 +* PHP >= 5.6 (without phpunit it should work on 5.5) * Guzzle ## Installation diff --git a/composer.json b/composer.json index 5db7ef8..56e708a 100644 --- a/composer.json +++ b/composer.json @@ -13,7 +13,7 @@ } ], "require": { - "php": ">=5.5", + "php": ">=5.6", "guzzlehttp/guzzle": "^6.2" }, "require-dev": {