【爬虫项目】某音无水印下载

爬虫目标

通过抖音的分享链接,获取抖音的原视频(无水印

mark

使用模块:requestsre

爬虫结构

主要为了最后获取视频时所要的参数

class DYSpider():

    def __init__(self,share_url):
        # get 获取3个参数 item_ids mid u_code
        self.share_url = share_url
        self.item_ids = ""
        self.mid = ""
        self.u_code = ""

        # get 获取参数 dytk 后面大括号需要 item_ids
        self.dytk_url = "https://www.iesdouyin.com/share/video/{}/"
        self.dytk = ""

        # 获取信息接口 get
        self.infor_url = "https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/"

        self.headers = {
            "user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1",
        }

        def get_imu(self):
            pass

        def get_dytk(self):
            pass

        def get_infor(self):
            pass

        def run(self):
            pass

get_imu()

获取 item_ids mid u_code三个参数

def get_imu(self):
    r = requests.get(url=self.share_url)
    dytk_data = r.url
    self.item_ids = re.search(r'video/(.+?)/', dytk_data).group(1)
    self.mid = re.search(r'mid=(.+?)&', dytk_data).group(1)
    self.u_code = re.search(r'u_code=(.+?)&', dytk_data).group(1)

get_dytk()

为了获取 dytk 参数

def get_dytk(self):
    url = self.dytk_url.format(self.item_ids)
    r = requests.get(url=url,headers = self.headers,params={
                                                                "region": "CN",
                                                                "mid": self.mid,
                                                                "u_code": self.u_code,
                                                                "titleType": "title",
                                                                "utm_source": "copy_link",
                                                                "utm_campaign": "client_share",
                                                                "utm_medium": "android",
                                                                "app": "aweme",
                                                            })
    self.dytk = re.search(r'dytk: "(.+?)" }\);', r.text)

get_infor()

没有对返回的json进行处理

def get_infor(self):
    r = requests.get(url=self.infor_url,headers = self.headers ,params={
        "item_ids": self.item_ids,
        "dytk": self.dytk,
    })
    print(r.json())

运行方法

def run(self):
    self.get_imu()
    self.get_dytk()
    self.get_infor()

使用

dys = DYSpider("https://v.douyin.com/Wf6Rsa/")
dys.run()
发表评论 / Comment

用心评论~