当前位置:首页 > 资讯 > 正文

爬取百度图片

爬取百度图片

Python爬取百度图片通常会用到`requests`库来发送HTTP请求获取网页内容,然后利用如BeautifulSoup、Pandas等库解析HTML,提取出图片链接。以下是一个简单的示例,展示如何使用`beautifulsoup4`库从搜索结果页抓取图片: ```python import requests from bs4 import BeautifulSoup import os def get_baidu_image_links(keyword): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } url = f'http://image.baidu.com/search/index?tn=baiduimage&ie=utf-8&word={keyword}' response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'lxml') # 图片链接通常在class为"i-bdimg"的元素中 img_tags = soup.find_all('a', class_='i-bdimg') image_links = [img['href'] for img in img_tags] return image_links # 下载并保存图片 def download_images(image_urls, save_dir='images'): if not os.path.exists(save_dir): os.makedirs(save_dir) for i, link in enumerate(image_urls[:10]): # 通常只下载部分图片 img_response = requests.get(link, headers=headers) with open(os.path.join(save_dir, f'image_{i+1}.jpg'), 'wb') as f: f.write(img_response.content) # 使用示例 keyword = '猫' image_links = get_baidu_image_links(keyword) download_images(image_links)