【爬虫项目】纯接口 协程 爬取王者皮肤

爬虫目标

mark

所用模块:requestsosgevent

爬虫结构

项目结构

class WZSpider():
    def __init__(self):
        # 英雄数据
        self.herolist = "https://pvp.qq.com/web201605/js/herolist.json"

        # 英雄详情页
        self.infor = "https://pvp.qq.com/web201605/herodetail/{}.shtml"

        # 图片链接
        self.img_url = "http://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/{}/{}-bigskin-{}.jpg"

        # 存储英雄详情也链接
        self.infor_urls = requests.get(url=self.herolist).json()

    def download(self, infor):

    def run(self):

下载方法

把下载方法单独提出,能进行扩展协程,多线程等

def download(self, infor):
    ename = infor.get("ename")
    cname = infor.get("cname")
    # 创建目录
    path = "infor/{}".format(cname)
    makedirs(path)
    try:
        # 皮肤名字
        skins = infor.get("skin_name").split("|")
    except AttributeError:
        return
    else:
        for index, skin in enumerate(skins):
            img_url = self.img_url.format(ename, ename, index + 1)
            img = requests.get(url=img_url)
            print(cname,skin,img_url)
            with open("infor/" + cname + "/" + skin + ".jpg", "wb") as file:
                file.write(img.content)

调用

def run(self):
    glist = [gevent.spawn(self.download,url) for url in self.infor_urls]
    gevent.joinall(glist)

使用

if __name__ == '__main__':
    wzs = WZSpider()
    wzs.run()

mark

发表评论 / Comment

用心评论~