网址错误的网址请求Python

时间:2018-07-14 17:51:53

标签: python httprequest urlrequest

我最近开始为我正在从事的项目编写Python。我编写了一个脚本,该脚本获取图像的URL列表(如txt文件中的URL)并全部下载。但是,列表中的某些URL很旧,不再起作用。这会导致错误。另外,如果链接需要很长时间才能加载,也会导致错误。

代码: 导入urllib.request

import random


def downloadImageFromURL(url):

    name = random.randrange(1, 10000)

    full_name = str(name) + ".jpg"

    urllib.request.urlretrieve(url, full_name)


f = open('url.txt','r')

for row in range(0, 10):

   line = f.readline()

    try:

        downloadImageFromURL(line)

    except ConnectionError:

        print("Failed to open url.")

    print(line)

f.close()

新代码:

import urllib.request
import random

def sendRequest(url):
    try:
        page = requests.get(url, stream = True, timeout = 5)
    except Exception:
       return False

    else:
        if (page.status_code == 200):
            return page

        else:
            return False

f = open('url.txt','r')
for row in range(0, 10):
    line = f.readline()
    try:
        sendRequest(line)
    except ConnectionError:
        print("Failed to open url.")
    print(line)
f.close()

谢谢!

1 个答案:

答案 0 :(得分:1)

import os
import requests
import shutil

outputDirectory = r"C:\Users\Joshua\Documents\Downloaded Media"

def sendRequest(url):
    try:
        page = requests.get(url, stream = True, timeout = 5)

    except Exception:
        pass

    else:
        if (page.status_code == 200):
            return page

    return False

def downloadImage(imageUrl: str, filePath: str):
    img = sendRequest(imageUrl)

    if (img == False):
        return False

    with open(filePath, "wb") as f:
        img.raw.decode_content = True

        try:
            shutil.copyfileobj(img.raw, f)
        except Exception:
            return False

    return True


URL = "https://upload.wikimedia.org/wikipedia/commons/b/b6/Image_created_with_a_mobile_phone.png"

imageName = URL.split("/")[-1] # Image_created_with_a_mobile_phone.png

# C:\Users\Joshua\Documents\Downloaded Media\Image_created_with_a_mobile_phone.png
imagePath = os.path.join(outputDirectory, imageName)

downloadImage(URL, imagePath)