我尝试使用SplashRequest和endpoint ='执行'使用以下代码发送帖子请求,但结果显示我没有成功。
import re
import sys
import os
import scrapy
from scrapy_splash import SplashRequest
from crawler.items import CrawlerItem
class Exp10itSpider(scrapy.Spider):
name = "test"
lua_script = """
function main(splash, args)
assert(splash:go(args.url))
assert(splash:wait(0.5))
return splash:html()
end
"""
def start_requests(self):
urls = [
'https://httpbin.org/post^sss=lalala'
]
for url in urls:
if "^" in url:
post_url_list = url.split("^")
post_url = post_url_list[0]
post_data = post_url_list[1]
yield SplashRequest(post_url, self.parse, endpoint='execute', magic_response=True, meta={'handle_httpstatus_all': True}, args={'lua_source': self.lua_script, 'http_method': 'POST', 'body': post_data})
def parse(self, response):
input("body is:")
print(response.body)
但是,我无法成功发布,返回内容显示我发布的信息不好。您可以通过scrapy_splash中的SplashRequest请求我的帖子请求吗?
答案 0 :(得分:2)
需要Splash 1.8+来处理POST请求;在早期的Splash版本
http_method
和body
参数被忽略。如果您使用/execute
端点并希望支持POST请求,则必须手动处理Lua脚本中的http_method
和body
参数。
同样在文档中,还有POST请求的示例。适用于最小的准备运行示例:
import scrapy
from scrapy_splash import SplashRequest
class SplashPostSpider(scrapy.Spider):
name = "splash_post"
lua_script = """
function main(splash, args)
assert(splash:go{
splash.args.url,
http_method=splash.args.http_method,
body=splash.args.body,
})
assert(splash:wait(0.5))
return {
html = splash:html(),
}
end
"""
def start_requests(self):
post_url = 'https://httpbin.org/post'
post_data = 'foo=bar'
yield SplashRequest(post_url, self.parse, endpoint='execute',
magic_response=True, meta={'handle_httpstatus_all': True},
args={'lua_source': self.lua_script, 'http_method': 'POST', 'body': post_data})
def parse(self, response):
print(response.body)