如何下载img并将img路径保存到csv文件

时间:2019-07-20 10:31:15

标签: python web-scraping

我抓取了一个网站,并获得了标题,描述和img URL和链接,我想下​​载img并导出包含标题,描述和链接的csv文件的路径

我抓取了img网址

import requests
import csv
from bs4 import BeautifulSoup 
requests.packages.urllib3.disable_warnings()
import pandas as pd
import re
url = 'https://palagems.myshopify.com/collections/all-gems'

while True:
    session = requests.Session()
    session.headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"}
    content = session.get(url, verify=False).content
    soup = BeautifulSoup(content, "html.parser")
    posts = soup.find_all('div',{'class':'grid-item small--one-half medium--one-quarter large--one-quarter'})

    npo_jobs = {}
    items = []  
    data = []
    for url in posts:
        #price = url.find('span').text
        link = url.find('a').get('href')
        links = ('https://palagems.myshopify.com'+ link)
        url_response = requests.get(links)
        url_data = url_response.text
        url_soup = BeautifulSoup(url_data, 'html.parser')
        title_find = url_soup.find('div',{'class':'grid-item large--two-fifths'})
        title = title_find.find('h1').text
        img_find = url_soup.find('div',{'class':'lazyload__image-wrapper no-js product__image-wrapper'})
        img = img_find.find('img')
        img_link = img.get('src')
        image = ('https:'+ img_link)
        print(image)
        print(title)
        print(links)
     for tr in url_soup.find_all('tr'):
        desciption = url_soup.find('div',{'class':'product-description rte'})
        planet_data = dict()
        planet_data['gem-color'] = desciption.find('p',{'class':'gem-color'}).text
        planet_data['gem-shape'] = desciption.find('p',{'class':'gem-shape'}).text
        planet_data['gem-measurements'] = desciption.find('p',{'class':'gem-measurements'}).text

        planet_data['gem-country'] = desciption.find('p',{'class':'gem-country'})

        planet_data['gem-enhancement'] = desciption.find('p',{'class':'gem-enhancement'})
        planet_data['gem-qty-of-stones'] = desciption.find('p',{'class':'gem-qty-of-stones'}).text
        planet_data['gem-total-weight'] = desciption.find('p',{'class':'gem-total-weight'}).text
        print(planet_data)
        data.append((title,planet_data, links ))
    with open('ineryrg20195684.csv', 'a') as csv_file:
     writer = csv.writer(csv_file)
     writer.writerow(['title','gem-color','gem-shape','gem-measurements','gem-country','gem-enhancement','gem-qty-of-stones','gem-total-weight','link'])
     #The for loop
     for title,planet_data,links in data:
        writer.writerow([title,planet_data['gem-color'],planet_data['gem-shape'],planet_data['gem-measurements'],planet_data['gem-country'],planet_data['gem-enhancement'],planet_data['gem-qty-of-stones'],planet_data['gem-total-weight'],links])

我想下载img并导出具有标题,描述和链接的csv文件的路径

0 个答案:

没有答案
相关问题