我不断收到此错误。当我使用下面的代码将值附加到项目时。注意,我没有包含蜘蛛类,start_requests函数或第一个解析函数,但我认为那里没有错误,因为它们都可以正常运行。
from folder.items import fd
import scrapy
from bs4 import BeautifulSoup
def parsenestedgame(self, response):
nested_url = response.url
nested_html = response.text
nested_soup = BeautifulSoup(nested_html, "html.parser")
odds_results = (nested_soup.find_all("button", "_32ge9K w0u0II _2D_dGb"))
team_results = (nested_soup.find_all("span", "_2I4p8L"))
sports = ["Basketball", "Hockey",
"Major League Baseball",
"Tennis", "Boxing / MMA",
"American Football"]
sportsurls = ["url1", "url2",
"url3",
"url4", "url5",
"url6"]
sportsdict = dict(zip(sportsurls, sports))
for key in sportsdict:
if key in nested_url:
sport = sportsdict[key]
teams_list = []
odds_list = []
for result in odds_results:
odds_list.append(result.text)
for team in team_results:
teams_list.append(team.text)
if not teams_list:
print(f"There were no teams found at {nested_url}.")
else:
game = f"{teams_list[0]} vs {teams_list[1]}"
bookie_list = ["bookie1", "bookie2", "bookie3", "bookie4", "bookie5", "bookie6", "bookie7",
"bookie8", "bookie10", "bookie11", "bookie12", "bookie13", "bookie14",
"bookie15", "bookie16", "bookie17",
"bookie18", "bookie19"]
if len(odds_list) == 18:
odds_list1 = [odds_list[0], odds_list[1], odds_list[2], odds_list[3], odds_list[4],
odds_list[5], odds_list[6],
odds_list[7], odds_list[8]]
odds_list2 = [odds_list[9], odds_list[10], odds_list[11], odds_list[12], odds_list[13],
odds_list[14],
odds_list[15], odds_list[16], odds_list[17]]
odds_list1P = [1, 2, 3, 4, 5, 6, 7, 8, 9]
odds_list2P = [1, 2, 3, 4, 5, 6, 7, 8, 9]
x, y = 0, 0
for entries1 in odds_list1:
if not entries1:
odds_list1[x] = 0
odds_list1P[x] = 0
else:
odds_list1[x] = int(entries1)
entries1 = float(entries1)
if entries1 > 0:
odds_list1P[x] = ((entries1 / 100) + 1)
if entries1 < 0:
odds_list1P[x] = ((100 / abs(entries1)) + 1)
# odds_list1P[x] = (abs(int(entries1)))/(abs(int(entries1))+100)
x += 1
for entries2 in odds_list2:
if not entries2:
odds_list2[y] = 0
odds_list2P[y] = 0
else:
odds_list2[y] = int(entries2)
entries2 = float(entries2)
if entries2 > 0:
odds_list2P[y] = ((entries2 / 100) + 1)
if entries2 < 0:
odds_list2P[y] = ((100 / abs(entries2)) + 1)
# odds_list2P[y] = (abs(int(entries2)))/(abs(int(entries2))+100)
y += 1
# I now have all the lists I need to pass to items
print(type(game))
print(type(teams_list[0]))
print(type(teams_list[1]))
print(type(odds_list1[0]))
print(type(odds_list2[0]))
print(type(odds_list1P[0]))
print(type(odds_list2P[0]))
print(type(bookie_list[0]))
print(type(sport))
fd = fd()
fd["game"] = game
fd["team1"] = teams_list[0]
fd["team2"] = teams_list[1]
fd["odds1a"] = odds_list1[0]
fd["odds2a"] = odds_list2[0]
fd["odds1d"] = odds_list1P[0]
fd["odds2d"] = odds_list2P[0]
fd["bookie"] = b_list[0]
fd["sport"] = sport
yield fd
这是我下面的物品。
import scrapy
class fd(scrapy.Item):
game = scrapy.Field()
team1 = scrapy.Field()
team2 = scrapy.Field()
odds1a = scrapy.Field()
odds2a = scrapy.Field()
odds1d = scrapy.Field()
odds2d = scrapy.Field()
bookie = scrapy.Field()
sport = scrapy.Field()
当我在上面的代码行的前面打印出类型时,您会得到这个。
输出
<class 'str'>
<class 'str'>
<class 'str'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'int'>
<class 'str'>
<class 'str'>
2020-06-11 16:58:26 [scrapy.core.scraper] ERROR: Spider must return Request, BaseItem, dict or None, got 'tuple' in <GET URL>
我不明白,请帮助!如果需要,我将提供更多代码。
答案 0 :(得分:0)
我相信您在代码中调用
import matplotlib.pyplot as plt
import random as rand
from pylab import *
类时会发生错误:
folder.item
您要使用与fd = fd()
类相同的方式来命名变量,我建议重命名该变量:
folder.item
当然,这意味着您还必须更改代码的以下部分:
item_data = fd()
希望这将为您指明正确的方向。另外,我不确定您为什么同时使用item_data["game"] = game
item_data["team1"] = teams_list[0]
item_data["team2"] = teams_list[1]
.
.
.
item_data["sport"] = sport
yield fd(**item_data)
和beautiful soup
,据我所知,如果有特定原因,我很高兴知道关于它的更多信息。请随时提出任何问题! :D