-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.py
116 lines (89 loc) · 3.19 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from daftlistings import Daft
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, DateTime, Float, Boolean
from sqlalchemy.orm import sessionmaker
from util import post_listing_to_slack
from slackclient import SlackClient
import time
import settings
import sys
engine = create_engine('sqlite:///listings.db', echo=False)
Base = declarative_base()
class Daft_Listing(Base):
"""
A table to store data on Daft listings.
"""
__tablename__ = 'listings'
id = Column(Integer, primary_key=True)
link = Column(String, unique=True)
created = Column(DateTime)
building_type = Column(String)
address = Column(String)
price = Column(String)
bedrooms = Column(Integer)
bathrooms = Column(Integer)
area = Column(String)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
def scrape_area(area):
"""
Scrapes Daft for a certain area, and finds the latest listings based on certain filtering criteria
:param area:
:return: A list of results.
"""
d = Daft()
offset = 0
pages = True
while pages:
daft_houses = d.get_listings(
county=settings.COUNTY,
area=area,
offset=offset,
listing_type=settings.dwelling_type,
sale_type=settings.rent_or_sale,
max_price=settings.MAX_PRICE,
min_price=settings.MIN_PRICE
)
results = []
if not daft_houses:
pages = False
for listing in daft_houses:
foo = session.query(Daft_Listing).filter_by(link=listing.get_daft_link()).first()
# Don't store the listing if it already exists.
if foo is None:
# Create the listing object.
foo = Daft_Listing(
link=listing.get_daft_link(),
created=listing.get_posted_since(),
building_type=listing.get_dwelling_type(),
address=listing.get_formalised_address(),
price=listing.get_price(),
bedrooms=listing.get_num_bedrooms(),
bathrooms =listing.get_num_bathrooms(),
area=listing.get_town()
)
# Save the listing so we don't grab it again.
session.add(foo)
session.commit()
price = listing.get_price()
if price is not None:
price = price.encode('utf-8')
results.append([listing.get_formalised_address(),price,listing.get_num_bedrooms(),listing.get_daft_link()])
offset += 10
return results
def do_scrape():
"""
Runs the scraper, and posts data to slack.
"""
# Create a slack client.
sc = SlackClient(settings.SLACK_TOKEN)
# Get all the results from Daft.
all_results = []
for area in settings.AREAS:
all_results += scrape_area(area)
print("{}: Got {} results".format(time.ctime(), len(all_results)))
# Post each result to slack.
for result in all_results:
post_listing_to_slack(sc, result)