From 74e591054877e0abfe3a9f31211011009ec34103 Mon Sep 17 00:00:00 2001 From: williamy2k <44499245+williamy2k@users.noreply.github.com> Date: Mon, 5 Sep 2022 21:52:00 +0100 Subject: [PATCH] Added image_url of the first listing image to the output table. --- rightmove_webscraper/scraper.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rightmove_webscraper/scraper.py b/rightmove_webscraper/scraper.py index 12acc0b..e8d2d1a 100644 --- a/rightmove_webscraper/scraper.py +++ b/rightmove_webscraper/scraper.py @@ -172,6 +172,7 @@ def _get_page(self, request_content: str, get_floorplans: bool = False): xp_agent_urls = """//div[@class="propertyCard-contactsItem"]\ //div[@class="propertyCard-branchLogo"]\ //a[@class="propertyCard-branchLogo-link"]/@href""" + xp_image_urls = """//div[@class="propertyCard-img "]//img/@src""" # Create data lists from xpaths: price_pcm = tree.xpath(xp_prices) @@ -180,6 +181,7 @@ def _get_page(self, request_content: str, get_floorplans: bool = False): base = "http://www.rightmove.co.uk" weblinks = [f"{base}{tree.xpath(xp_weblinks)[w]}" for w in range(len(tree.xpath(xp_weblinks)))] agent_urls = [f"{base}{tree.xpath(xp_agent_urls)[a]}" for a in range(len(tree.xpath(xp_agent_urls)))] + image_urls = tree.xpath(xp_image_urls) # Optionally get floorplan links from property urls (longer runtime): floorplan_urls = list() if get_floorplans else np.nan @@ -197,11 +199,11 @@ def _get_page(self, request_content: str, get_floorplans: bool = False): floorplan_urls.append(np.nan) # Store the data in a Pandas DataFrame: - data = [price_pcm, titles, addresses, weblinks, agent_urls] + data = [price_pcm, titles, addresses, weblinks, agent_urls, image_urls] data = data + [floorplan_urls] if get_floorplans else data temp_df = pd.DataFrame(data) temp_df = temp_df.transpose() - columns = ["price", "type", "address", "url", "agent_url"] + columns = ["price", "type", "address", "url", "agent_url", "image_url"] columns = columns + ["floorplan_url"] if get_floorplans else columns temp_df.columns = columns