暗区突围月光门事件利用爬虫代码美女写真套图下载转PDF保存
   发布时间:2025-11-04 20:51:54   作者:玩站小弟   
 我要评论
爬的利用不是全站链接,该爬虫爬的爬虫暗区突围月光门事件是每套图的链接,相当于是代码暗区突围1.0.52.52下载详情页的链接需要单独下载的python库requestsfake_useragentxpathPIL下面有图片 打码是为了过审,这网站真的美女暗区突围公测版是正经的写真网站不是什么老司机网站import requests        from fake_useragent import UserAgent        from lxml import etree        import os        import aiohttp        import asyncio        from PIL import Image        import shutil        #获取详情页全部图片函数        async def get_xq(url):        #计数        p=0        tasks = []        res = requests.get(url=url,headers=header)        res.encoding="utf-8"        res.close()        tree = etree.HTML(res.text)        #图片标题        imgtitle = tree.xpath(//title/text())[0]        global title        title = imgtitle        #第一页的图片链接        img = tree.xpath(//article//img/@src)        imgurl = ["https://www.xgyw01.co" + i for i in img]        print("当前下载的套图标题:\n"+imgtitle)        for i in imgurl:        p += 1        tasks.append(down(i,imgtitle,p))        #获取下一页的链接        next = "https://www.xgyw01.co" + tree.xpath(//div[@class="pagination"][1]//a[last()]/@href)[0]        #判断的条件        bool = tree.xpath(//div[@class="pagination"][1]//a[last()]/text())[0]        #循环获取所有页        while True:        print("正在获取"+next)        if bool == "下一页" or bool == "下页":        res = requests.get(url=next, headers=header)        res.encoding = "utf-8"        res.close()        tree = etree.HTML(res.text)        img = tree.xpath(//article//img/@src)        imgurl = ["https://www.xgyw01.co" + i for i in img]        for i in imgurl:        p += 1        tasks.append(down(i,imgtitle,p))        #判断的条件        bool = tree.xpath(//div[@class="pagination"][1]//a[last()]/text())[0]        if bool == "下一页" or bool == "下页":        next = "https://www.xgyw01.co" + tree.xpath(//div[@class="pagination"][1]//a[last()]/@href)[0]        continue        else:        break        else:        break        print("开始异步下载图片.....")        await asyncio.wait(tasks)        #下载函数        async def down(url,filename,fn):        filepath = os.path.join(dirdz,filename)        if not os.path.exists(filepath):        os.mkdir(filepath)        async with aiohttp.ClientSession() as session:        async with session.get(url) as res:        with open(filepath+"/"+str(fn)+".jpg",mode="wb") as f:        f.write(await res.content.read())        print(f"第{fn}张下载完成")        #写真图片转PDF函数         def jpg_pdf(filename):        # 定义文件夹路径和PDF文件名        folder_path = os.path.join(dirdz,filename)        pdf_filename = os.path.join(dirdz,filename + ".pdf")        # # 将JPEG文件转换为Pillow Image对象并添加到列表中        image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(.jpeg) or f.endswith(.jpg)]        image_list = []        for file_path in image_files:        img = Image.open(os.path.abspath(file_path))        image_list.append(img)        # 找到所有JPEG文件        image_list[0].save(pdf_filename, "PDF" ,resolution=100.0, save_all=True, append_images=image_list[1:])        print("pdf合并完成,删除缓存的的图片")        shutil.rmtree(folder_path)        print("清理完成")        #主函数         if __name__ == __main__:        #定义的公共请求头        header = {"user-agent": UserAgent().random}        #定义的保存路径(文件夹路径)        dirdz = "这里输入保存的文件夹路径"        #定义一个全局的标题保存文件名字        title = ""        #爬取的网站是:https://www.xgyw01.co/,找到想下的写真下载暗区突围端游外挂套图直接点进去复制详情页的链接就行了        url = input("请输入套图详情页链接:")        #使用异常捕获排除异常        try:        #因为是异步函数所以需要用asyncio.run启动        asyncio.run(get_xq(url))        print(title)        except:        print("链接详情页解析错误.........")        print("全部下载完成,开始生成pdf.......")        #调用图片转PDF函数        jpg_pdf(title)        。

爬的利用不是全站链接
