forked from veltman/statelympics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstate-flags.py
54 lines (39 loc) · 951 Bytes
/
state-flags.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import requests
from bs4 import BeautifulSoup
import re
import json
postalCodes = {}
states = {}
soup = BeautifulSoup(open("flagtable.html").read())
abbreviations = json.loads(open("abbreviations.json").read())
for a in abbreviations:
postalCodes[a["State"]] = a["Abbreviation"]
innerTables = soup.find_all("table")
i = 0
for table in innerTables:
# print table
if table.find_all("table"):
continue
img = table.find("img")["src"]
if not re.match("http",img):
img = "http:"+img
lastCell = table.find_all("td")[-1]
if re.search("reverse",lastCell.text):
continue
for link in lastCell.find_all("a"):
if link.text == "Flag":
continue
if re.search(".?cite",link["href"]):
continue
print link.text.strip()
states[link.text.strip()] = {
"abbreviation": postalCodes[link.text.strip()],
"img": img,
"medals": {
"total": 0,
"gold": 0,
"silver": 0,
"bronze": 0
}
}
print json.dumps(states)