爬虫目标
通过抖音的分享链接,获取抖音的原视频(无水印)
使用模块:requests
、re
爬虫结构
主要为了最后获取视频时所要的参数
class DYSpider(): def __init__(self,share_url): # get 获取3个参数 item_ids mid u_code self.share_url = share_url self.item_ids = "" self.mid = "" self.u_code = "" # get 获取参数 dytk 后面大括号需要 item_ids self.dytk_url = "https://www.iesdouyin.com/share/video/{}/" self.dytk = "" # 获取信息接口 get self.infor_url = "https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/" self.headers = { "user-agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1", } def get_imu(self): pass def get_dytk(self): pass def get_infor(self): pass def run(self): pass
get_imu()
获取 item_ids
mid
u_code
三个参数
def get_imu(self): r = requests.get(url=self.share_url) dytk_data = r.url self.item_ids = re.search(r'video/(.+?)/', dytk_data).group(1) self.mid = re.search(r'mid=(.+?)&', dytk_data).group(1) self.u_code = re.search(r'u_code=(.+?)&', dytk_data).group(1)
get_dytk()
为了获取 dytk
参数
def get_dytk(self): url = self.dytk_url.format(self.item_ids) r = requests.get(url=url,headers = self.headers,params={ "region": "CN", "mid": self.mid, "u_code": self.u_code, "titleType": "title", "utm_source": "copy_link", "utm_campaign": "client_share", "utm_medium": "android", "app": "aweme", }) self.dytk = re.search(r'dytk: "(.+?)" }\);', r.text)
get_infor()
没有对返回的json
进行处理
def get_infor(self): r = requests.get(url=self.infor_url,headers = self.headers ,params={ "item_ids": self.item_ids, "dytk": self.dytk, }) print(r.json())
运行方法
def run(self): self.get_imu() self.get_dytk() self.get_infor()
使用
dys = DYSpider("https://v.douyin.com/Wf6Rsa/") dys.run()
版权声明:《 【爬虫项目】某音无水印下载 》为明妃原创文章,转载请注明出处!
最后编辑:2020-3-17 15:03:29