Scrapy框架使用讯代理动态转发切换IP

讯代理动态转发

# 讯代理动态转发源码 
import requests
import hashlib
import time

class IP(object):
    def __init__(self, orderno, secret):
        self.orderno =orderno
        self.secret = secret

    def Headers(self):
        timestamp = str(int(time.time()))       # 计算时间戳
        planText ='orderno='+ self.orderno + ',' + 'secret=' + self.secret + ',' + 'timestamp=' + timestamp   #‘订单号’‘接口’‘时间戳’拼接出字符串
        string = planText.encode()  #需要MD5加密 需要转码
        md5_string = hashlib.md5(string).hexdigest()   # planText 拼接出的字符串 进行MD5()
        sign = md5_string.upper()    #转成大写
        auth ='sign='+ sign + '&'+ 'orderno=' + self.orderno + '&' + 'timestamp=' + timestamp #‘加密的字符串’‘订单号’‘时间戳’拼接字符串

        headers = auth   #认证的头部
        return headers

if __name__ =='__main__':

    # 注意不同的网站,修改不同的 http
    ip = IP('订单号','secret')
    proxy = {'http':'http://forward.xdaili.cn:80'}
    headers = {'Proxy-Authorization':ip.Headers()}
    print(headers)
    r = requests.get(url="http://httpbin.org/ip", headers=headers, proxies=proxy).json()
    print(r)

随机IP

开启下载中间件

# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
   'IpApp.middlewares.IpappDownloaderMiddleware': 543,
}

Spider 源码

class IpspiderSpider(scrapy.Spider):
    name = 'ipSpider'
    allowed_domains = ['httpbin.org']
    start_urls = ['http://httpbin.org/ip']

    def parse(self, response):
        print(response.text)
        yield Request(url=self.start_urls[0], dont_filter=True)

设置随机IP

在 middlewares.py 里编辑 DownloaderMiddleware() 类

from IpApp.xundaili import IP 

class IpappDownloaderMiddleware(object):

    def process_request(self, request, spider):
        ip = IP('订单号','secret')
        headers = ip.Headers()
        proxy = 'http://forward.xdaili.cn:80'
        request.headers['Proxy-Authorization'] = headers
        request.meta['proxy'] = proxy
        return None

测试结果

mark

小错误提示

当使用 认证类代理是 出现 找不到 Proxy-Authorization的头部信息

原因:scrapy 会自动去掉 Proxy-Authorization
解决:
进入 scrapy 的源码
路径:scrapy\core\downloader\handlers\http11.py

注释掉:
# if isinstance(agent, self._TunnelingAgent):
#     headers.removeHeader(b'Proxy-Authorization')

或者 使用框架提供的 下载中间件

```
mark


关于下载中间件的了解

发表评论 / Comment

用心评论~