-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathscrapy初级5:用settings和middlewares设置ua请求头和proxy代理.py
62 lines (42 loc) · 1.68 KB
/
scrapy初级5:用settings和middlewares设置ua请求头和proxy代理.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#这里设置ua只是用了settings,也可以直接在spider里设置,实际情况可以用fake_useragent
#设置proxy比较麻烦
===========================================================================
#hupu.py
import scrapy
class MySpider(scrapy.Spider):
name = 'Hupu'
start_urls = ['http://httpbin.org/get',]
def parse(self, response):
print('*'*33,response.url,response.body)
def start_requests(self):
# 带着cookie向网站服务器发请求,表明我们是一个已登录的用户
yield scrapy.Request(self.start_urls[0],callback=self.parse)
===========================================================================
#settings.py
# -*- coding: utf-8 -*-
BOT_NAME = 'Hupu'
SPIDER_MODULES = ['Hupu.spiders']
NEWSPIDER_MODULE = 'Hupu.spiders'
ROBOTSTXT_OBEY = False
DEFAULT_REQUEST_HEADERS = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en',
'USER-AGENT ': 'fuck u 3 times'
}
IPS=["118.190.95.26:9001","183.48.91.107:8118",]
DOWNLOADER_MIDDLEWARES = {
#'scrapy.downloadermiddleware.useragent.UserAgentMiddleware': None,
'Hupu.middlewares.MyproxiesMiddleware': 400,
}
===========================================================================
#middlewares.py
from scrapy import signals
import random
from .settings import IPS
class MyproxiesMiddleware(object):
def __init__(self,ip=''):
self.ip=ip
def process_request(self, request, spider):
thisip=random.choice(IPS)
print("this is ip:"+thisip)
request.meta["proxy"]="http://"+thisip