forked from scrapfly/scrapfly-scrapers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.py
33 lines (24 loc) · 2.09 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
"""
This example run script shows how to run the Zillow scraper defined in ./zillow.py
It scrapes hotel data and saves it to ./results/
To run this script set the env variable $SCRAPFLY_KEY with your scrapfly API key:
$ export $SCRAPFLY_KEY="your key from https://scrapfly.io/dashboard"
"""
import asyncio
import json
from pathlib import Path
import zillow
output = Path(__file__).parent / "results"
output.mkdir(exist_ok=True)
async def run():
# enable scrapfly cache for basic use
zillow.BASE_CONFIG["cache"] = True
print("running Zillow scrape and saving results to ./results directory")
url = "https://www.zillow.com/san-francisco-ca/?searchQueryState=%7B%22usersSearchTerm%22%3A%22Nebraska%22%2C%22mapBounds%22%3A%7B%22north%22%3A37.890669225201904%2C%22east%22%3A-122.26750460986328%2C%22south%22%3A37.659734343010626%2C%22west%22%3A-122.59915439013672%7D%2C%22isMapVisible%22%3Atrue%2C%22filterState%22%3A%7B%22sort%22%3A%7B%22value%22%3A%22days%22%7D%2C%22ah%22%3A%7B%22value%22%3Atrue%7D%2C%22sche%22%3A%7B%22value%22%3Afalse%7D%2C%22schm%22%3A%7B%22value%22%3Afalse%7D%2C%22schh%22%3A%7B%22value%22%3Afalse%7D%2C%22schp%22%3A%7B%22value%22%3Afalse%7D%2C%22schr%22%3A%7B%22value%22%3Afalse%7D%2C%22schc%22%3A%7B%22value%22%3Afalse%7D%2C%22schu%22%3A%7B%22value%22%3Afalse%7D%2C%22apco%22%3A%7B%22value%22%3Afalse%7D%2C%22apa%22%3A%7B%22value%22%3Afalse%7D%2C%22con%22%3A%7B%22value%22%3Afalse%7D%2C%22tow%22%3A%7B%22value%22%3Afalse%7D%7D%2C%22isListVisible%22%3Atrue%2C%22mapZoom%22%3A12%2C%22schoolId%22%3Anull%2C%22regionSelection%22%3A%5B%7B%22regionId%22%3A20330%2C%22regionType%22%3A6%7D%5D%2C%22pagination%22%3A%7B%7D%7D"
result_location = await zillow.scrape_search(url)
output.joinpath("search.json").write_text(json.dumps(result_location, indent=2, ensure_ascii=False))
url = "https://www.zillow.com/homedetails/661-Lakeview-Ave-San-Francisco-CA-94112/15192198_zpid/"
result_property = await zillow.scrape_properties([url,])
output.joinpath("property.json").write_text(json.dumps(result_property[0], indent=2, ensure_ascii=False))
if __name__ == "__main__":
asyncio.run(run())