Django bulk_create很慢

时间:2014-09-09 16:26:26

标签: python django

我正在尝试使用bulk_create为不同的项目进行基准测试,并发现它比我喜欢的慢。我正在运行下面的内容,并且看到bulk_create每天可能需要一分钟。 This示例引用了10K对象的1/2,所以我很惊讶5倍大的内容需要花费20倍的时间。有没有办法让这更快?

import requests
import random
from fake_model.models import FakeNode, FakeStation, FakePrice, FakeWeather
from datetime import datetime, timedelta
from random import randint

#Make a word list
word_site = "http://www.cs.duke.edu/~ola/ap/linuxwords"
response = requests.get(word_site)
WORDS = response.content.splitlines()


def random_name(WORDS):
    name = "{0}-{1}".format(
        random.choice(WORDS).title(),
        random.choice(WORDS).title()
    )
    if len(name) < 100:
        return name
    else:
        return random_name(WORDS)

print "Deleting"
FakeNode.objects.all().delete()
FakeStation.objects.all().delete()

FakePrice.objects.all().delete()
FakeWeather.objects.all().delete()

print "Prepping"
node_list = [FakeNode(name=random_name(WORDS)) for x in range(0, 1000)]
station_list = [FakeStation(name=random_name(WORDS)) for x in range(0, 1000)]

print "Making"
FakeNode.objects.bulk_create(node_list)
FakeStation.objects.bulk_create(station_list)

start_date = datetime(2012, 1, 1)
end_date = datetime(2012, 1, 2)


def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)

print "@profile"


@profile
def make():
    price_list = []
    temp_list = []
    nodes = FakeNode.objects.all()
    stations = FakeStation.objects.all()
    for single_date in daterange(start_date, end_date):
        price_list = [
            FakePrice(
                node=node,
                dt=single_date,
                hr=hr,
                price=randint(0, 1000)
            )
            for node in nodes for hr in range(1, 25)
        ]
        temp_list = [
            FakeWeather(
                station=station,
                dt=single_date,
                hr=hr,
                temp=randint(0, 1000)
            )
            for station in stations for hr in range(1, 25)
        ]

        print single_date
        c = FakePrice.objects.bulk_create(price_list)
        c = FakeWeather.objects.bulk_create(temp_list)

make()
from django.db.models import Count
print FakePrice.objects.all().aggregate(Count('id'))

定时:

Total time: 13.8182 s
File: generate.py
Function: make at line 49

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    49                                           @profile
    50                                           def make():
    51         1            4      4.0      0.0      price_list = []
    52         1            2      2.0      0.0      temp_list = []
    53         1          221    221.0      0.0      nodes = FakeNode.objects.all()
    54         1          146    146.0      0.0      stations = FakeStation.objects.all()
    55         2           65     32.5      0.0      for single_date in daterange(start_date, end_date):
    56                                                   price_list = [
    57         1            3      3.0      0.0              FakePrice(
    58                                                           node=node,
    59                                                           dt=single_date,
    60                                                           hr=hr,
    61                                                           price=randint(0, 1000)
    62                                                       )
    63     25001      3373017    134.9     24.4              for node in nodes for hr in range(1, 25)
    64                                                   ]
    65                                                   temp_list = [
    66         1            2      2.0      0.0              FakeWeather(
    67                                                           station=station,
    68                                                           dt=single_date,
    69                                                           hr=hr,
    70                                                           temp=randint(0, 1000)
    71                                                       )
    72     25001      3463471    138.5     25.1              for station in stations for hr in range(1, 25)
    73                                                   ]
    74                                           
    75         1           70     70.0      0.0          print single_date
    76         1      3526035 3526035.0     25.5          c = FakePrice.objects.bulk_create(price_list)
    77         1      3455148 3455148.0     25.0          c = FakeWeather.objects.bulk_create(temp_list)

0 个答案:

没有答案