我抓取了一个网站,并获得了标题,描述和img URL和链接,我想下载img并导出包含标题,描述和链接的csv文件的路径
我抓取了img网址
import requests
import csv
from bs4 import BeautifulSoup
requests.packages.urllib3.disable_warnings()
import pandas as pd
import re
url = 'https://palagems.myshopify.com/collections/all-gems'
while True:
session = requests.Session()
session.headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}
content = session.get(url, verify=False).content
soup = BeautifulSoup(content, "html.parser")
posts = soup.find_all('div',{'class':'grid-item small--one-half medium--one-quarter large--one-quarter'})
npo_jobs = {}
items = []
data = []
for url in posts:
#price = url.find('span').text
link = url.find('a').get('href')
links = ('https://palagems.myshopify.com'+ link)
url_response = requests.get(links)
url_data = url_response.text
url_soup = BeautifulSoup(url_data, 'html.parser')
title_find = url_soup.find('div',{'class':'grid-item large--two-fifths'})
title = title_find.find('h1').text
img_find = url_soup.find('div',{'class':'lazyload__image-wrapper no-js product__image-wrapper'})
img = img_find.find('img')
img_link = img.get('src')
image = ('https:'+ img_link)
print(image)
print(title)
print(links)
for tr in url_soup.find_all('tr'):
desciption = url_soup.find('div',{'class':'product-description rte'})
planet_data = dict()
planet_data['gem-color'] = desciption.find('p',{'class':'gem-color'}).text
planet_data['gem-shape'] = desciption.find('p',{'class':'gem-shape'}).text
planet_data['gem-measurements'] = desciption.find('p',{'class':'gem-measurements'}).text
planet_data['gem-country'] = desciption.find('p',{'class':'gem-country'})
planet_data['gem-enhancement'] = desciption.find('p',{'class':'gem-enhancement'})
planet_data['gem-qty-of-stones'] = desciption.find('p',{'class':'gem-qty-of-stones'}).text
planet_data['gem-total-weight'] = desciption.find('p',{'class':'gem-total-weight'}).text
print(planet_data)
data.append((title,planet_data, links ))
with open('ineryrg20195684.csv', 'a') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['title','gem-color','gem-shape','gem-measurements','gem-country','gem-enhancement','gem-qty-of-stones','gem-total-weight','link'])
#The for loop
for title,planet_data,links in data:
writer.writerow([title,planet_data['gem-color'],planet_data['gem-shape'],planet_data['gem-measurements'],planet_data['gem-country'],planet_data['gem-enhancement'],planet_data['gem-qty-of-stones'],planet_data['gem-total-weight'],links])
我想下载img并导出具有标题,描述和链接的csv文件的路径