-
Notifications
You must be signed in to change notification settings - Fork 0
/
plant_bd.py
48 lines (44 loc) · 1.62 KB
/
plant_bd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import requests
import time
import re
headers={'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'zh-CN,zh;q=0.8',
'Cache-Control':'max-age=0',
'Connection':'keep-alive',
'Host':'img2.imgtn.bdimg.com',
'If-Modified-Since':'Thu, 01 Jan 1970 00:00:00 GMT',
'If-None-Match':'7deb005eb9d3f5f0312c43186a7b3dae',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36',}
def save_pic(url):
for i in range(10):
try:
pic = requests.get(url,headers=headers)
break
except:
if i==9:
print "request failed, skip"
return 0
print 'request again'
continue
name = str(time.time()).replace('.','_')
filepath ='plants/'+name+'.jpg'
file = open(filepath,'wb')
file.write(pic.content)
file.close()
print 'save '+name+' pic'
def main():
links = []
for i in range(5):
#request_url = 'http://image.baidu.com/search/index?tn=baiduimage&word=%E6%9C%88%E5%AD%A3%E5%9B%BE%E7%89%87&pn='+str(30*(i+1))+'&rn=30'
request_url = 'http://image.baidu.com/search/index?tn=baiduimage&word=紫甘蓝&pn='+str(30*(i+1))+'&rn=30'
a= requests.get(request_url)
res=re.compile('middleURL":"(.+?).jpg"').findall(a.content)
if res:
for link in res:
link_full = link+'.jpg"'
print link_full
save_pic(link_full)
if __name__=='__main__':
main()