将蜘蛛名称传递给线程类

时间:2017-04-19 19:50:55

标签: python multithreading scrapy

我正在尝试线程和scrapy。但是,无法弄清楚我做错了什么。只是试图将蜘蛛名称传递给线程类&得到KeyError。代码如下:

import threading
from PyQt4 import QtCore, QtGui
from scrapy.crawler import CrawlerRunner
from twisted.internet import reactor, defer
from scrapy.utils.project import get_project_settings
from scrapy.utils.log import configure_logging
from my_spider import QuotesSpider



class CrawlerThread(threading.Thread):
    def __init__(self,name):
        super(CrawlerThread,self).__init__() 
        self.name = name
    def stop(self):
        try:
             reactor.stop()
        except:
            pass
    def run(self):
        configure_logging({'LOG_LEVEL': 'DEBUG'})
        crun = CrawlerRunner(get_project_settings())
        crun.crawl(self.name)
        reactor.run(installSignalHandlers=False)


class Form(QtGui.QDialog):


    def __init__(self, parent=None):
        super(Form, self).__init__(parent)
        w =QtGui.QWidget()
        w.resize(250, 150)
        w.move(300, 300)
        self.ct = CrawlerThread(QuotesSpider)
        self.ct.start()

if __name__ == "__main__":
    app = QtGui.QApplication(sys.argv)
    form = Form()
    form.show()


    sys.exit(app.exec_())

错误:

Exception in thread <class 'my_spider.QuotesSpider'>:
Traceback (most recent call last):
  File "/usr/lib/python2.7/threading.py", line 810, in __bootstrap_inner
    self.run()
  File "/home/python/Python/code/Fresh_lostfilm/movies/movies/spiders/GUI.py", line 26, in run
    crun.crawl(self.name)
  File "/usr/local/lib/python2.7/dist-packages/scrapy/crawler.py", line 162, in crawl
    crawler = self.create_crawler(crawler_or_spidercls)
  File "/usr/local/lib/python2.7/dist-packages/scrapy/crawler.py", line 190, in create_crawler
    return self._create_crawler(crawler_or_spidercls)
  File "/usr/local/lib/python2.7/dist-packages/scrapy/crawler.py", line 194, in _create_crawler
    spidercls = self.spider_loader.load(spidercls)
  File "/usr/local/lib/python2.7/dist-packages/scrapy/spiderloader.py", line 51, in load
    raise KeyError("Spider not found: {}".format(spider_name))
KeyError: "Spider not found: <class 'my_spider.QuotesSpider'>"

QuotesSpider来自以下链接: Spiders example

P.s将self.name更改为QuotesSpider直接运行没有任何问题

1 个答案:

答案 0 :(得分:0)

看起来像更改“名称”到任何其他,不是那么常用的参数名称排序我的问题。以下是编辑后的代码:

import threading
from PyQt4 import QtCore, QtGui
from scrapy.crawler import CrawlerRunner
from twisted.internet import reactor, defer
from scrapy.utils.project import get_project_settings
from scrapy.utils.log import configure_logging
from my_spider import QuotesSpider



class CrawlerThread(threading.Thread):
    def __init__(self,myname): #changing name to myname
    super(CrawlerThread,self).__init__() 
    self.myname = myname       #changing name to myname
    def stop(self):
        try:
             reactor.stop()
        except:
            pass
    def run(self):
        configure_logging({'LOG_LEVEL': 'DEBUG'})
        crun = CrawlerRunner(get_project_settings())
        crun.crawl(self.myname)    #changing name to myname
        reactor.run(installSignalHandlers=False)


class Form(QtGui.QDialog):


    def __init__(self, parent=None):
        super(Form, self).__init__(parent)
        w =QtGui.QWidget()
        w.resize(250, 150)
        w.move(300, 300)
        self.ct = CrawlerThread(QuotesSpider)
        self.ct.start()

if __name__ == "__main__":
    app = QtGui.QApplication(sys.argv)
    form = Form()
    form.show()


    sys.exit(app.exec_())