博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
JD 评论晒图爬虫
阅读量:7005 次
发布时间:2019-06-27

本文共 1988 字,大约阅读时间需要 6 分钟。

JD 评论晒图爬虫

#coding=utf-8import requestsimport reimport os__author__ = 'depy'"""jd 评论晒图爬虫@productId 商品id@startpage 开始页数@endpage 结束页数"""class JDPIC(object):    def __init__(self,productId,startpage,endpage=20):        self.headers = {            'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',            'Accept':'*/*',            'Accept-Encoding':'gzip, deflate, sdch, br',            'Accept-Language':'zh-CN,zh;q=0.8',            'Cookie':''        }        self.url = 'https://club.jd.com/discussion/getProductPageImageCommentList.action'        self.startpage = startpage        self.productId = productId        self.endpage = endpage    def sendReq(self,page):        params = {            'productId':self.productId,            'isShadowSku':'0',            'callback':'jQuery219465',            'page':page,            'pageSize':20        }        r = requests.get(self.url,params=params,headers=self.headers,timeout=10)        regex = re.findall(r'"imageUrl":"//(.*?)"',r.text)        return regex    def downloadImageFile(self,imgUrl):        local_filename = imgUrl.split('/')[-1]        print "Download Image File=", local_filename        imgUrl = 'http://'+imgUrl        r = requests.get(imgUrl, headers =self.headers,stream=True, timeout=20)        dirName = 'JDPIC1'        if not os.path.exists(dirName):            os.makedirs(dirName)        with open(dirName+'/'+local_filename, 'wb') as f:            for chunk in r.iter_content(chunk_size=1024):                if chunk:                    f.write(chunk)                    f.flush()            f.close()if __name__ == '__main__':    J = JDPIC(1111,51,100)  #商品id自行修改    #print J.endpage    list = range(int(J.startpage),int(J.endpage)+1)    for i in list:        regexlist = J.sendReq(i)        for picurl in regexlist:            J.downloadImageFile(picurl)    print "downpic success"

 

转载于:https://www.cnblogs.com/depycode/p/6933960.html

你可能感兴趣的文章