讯代理动态转发
# 讯代理动态转发源码 import requests import hashlib import time class IP(object): def __init__(self, orderno, secret): self.orderno =orderno self.secret = secret def Headers(self): timestamp = str(int(time.time())) # 计算时间戳 planText ='orderno='+ self.orderno + ',' + 'secret=' + self.secret + ',' + 'timestamp=' + timestamp #‘订单号’‘接口’‘时间戳’拼接出字符串 string = planText.encode() #需要MD5加密 需要转码 md5_string = hashlib.md5(string).hexdigest() # planText 拼接出的字符串 进行MD5() sign = md5_string.upper() #转成大写 auth ='sign='+ sign + '&'+ 'orderno=' + self.orderno + '&' + 'timestamp=' + timestamp #‘加密的字符串’‘订单号’‘时间戳’拼接字符串 headers = auth #认证的头部 return headers if __name__ =='__main__': # 注意不同的网站,修改不同的 http ip = IP('订单号','secret') proxy = {'http':'http://forward.xdaili.cn:80'} headers = {'Proxy-Authorization':ip.Headers()} print(headers) r = requests.get(url="http://httpbin.org/ip", headers=headers, proxies=proxy).json() print(r)
随机IP
开启下载中间件
# Enable or disable downloader middlewares # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html DOWNLOADER_MIDDLEWARES = { 'IpApp.middlewares.IpappDownloaderMiddleware': 543, }
Spider 源码
class IpspiderSpider(scrapy.Spider): name = 'ipSpider' allowed_domains = ['httpbin.org'] start_urls = ['http://httpbin.org/ip'] def parse(self, response): print(response.text) yield Request(url=self.start_urls[0], dont_filter=True)
设置随机IP
在 middlewares.py 里编辑 DownloaderMiddleware() 类
from IpApp.xundaili import IP class IpappDownloaderMiddleware(object): def process_request(self, request, spider): ip = IP('订单号','secret') headers = ip.Headers() proxy = 'http://forward.xdaili.cn:80' request.headers['Proxy-Authorization'] = headers request.meta['proxy'] = proxy return None
测试结果
小错误提示
当使用 认证类代理是 出现 找不到
Proxy-Authorization
的头部信息
原因:scrapy 会自动去掉 Proxy-Authorization
解决:
进入 scrapy 的源码
路径:scrapy\core\downloader\handlers\http11.py
注释掉:
# if isinstance(agent, self._TunnelingAgent):
# headers.removeHeader(b'Proxy-Authorization')
或者 使用框架提供的 下载中间件
版权声明:《 Scrapy框架使用讯代理动态转发切换IP 》为明妃原创文章,转载请注明出处!
最后编辑:2019-12-9 09:12:04