我已调试此代码,但仍有一些错误,我不知道如何处理这个问题。
我已经搜索过类似的问题,但仍然有一些问题。
driver.findElement(By.cssSelector("input#txtKeyword.input-medium.search-query")).sendKeys(emailId + Keys.ENTER);
错误如下:
import requests
from bs4 import BeautifulSoup
import time
import json
import os
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
url = 'http://www.zhihu.com'
loginURL = 'http://www.zhihu.com/login/email'
headers = {
"User-Agent": 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:41.0) Gecko/20100101 Firefox/41.0',
"Referer": "http://www.zhihu.com/",
'Host': 'www.zhihu.com',
}
data = {
'email': 'xxxxxxx1@gmail.com',
'password': 'xxxxxxx',
'rememberme': "true",
}
s = requests.session()
if os.path.exists('cookiefile'):
with open('cookiefile') as f:
cookie = json.load(f)
s.cookies.update(cookie)
req1 = s.get(url, headers=headers)
with open('zhihu.html', 'w') as f:
f.write(req1.content)
else:
req = s.get(url, headers=headers)
print req
soup = BeautifulSoup(req.text, "html.parser")
xsrf = soup.find('input', {'name': '_xsrf', 'type': 'hidden'}).get('value')
data['_xsrf'] = xsrf
timestamp = int(time.time() * 1000)
captchaURL = 'http://www.zhihu.com/captcha.gif?=' + str(timestamp)
print captchaURL
with open('zhihucaptcha.gif', 'wb') as f:
captchaREQ = s.get(captchaURL)
f.write(captchaREQ.content)
loginCaptcha = raw_input('input captcha:\n').strip()
data['captcha'] = loginCaptcha
print data
loginREQ = s.post(loginURL, headers=headers, data=data)
print loginREQ.url
print s.cookies.get_dict()
with open('cookiefile', 'wb') as f:
json.dump(s.cookies.get_dict(), f)
# http://www.zhihu.com/question/27621722/answer/48820436.
zanBaseURL = 'http://www.zhihu.com/answer/14926794/voters_profile?&offset={0}'
page = 0
count = 0
while 1:
zanURL = zanBaseURL.format(str(page))
page += 10
zanREQ = s.get(zanURL, headers=headers)
zanData = zanREQ.json()['payload']
if not zanData:
break
for item in zanData:
zansoup = BeautifulSoup(item, "html.parser").find(
'a', {'target': "_blank", 'class': 'zg-link'})
print 'nickname:', zansoup.get('title'), ' ',
print 'person_url:', zansoup.get('href')
count += 1
print count
答案 0 :(得分:0)
来自dox:
如果find()找不到任何内容,则返回None
这会产生预期的错误,因为None
类型没有任何属性。
因此,您可以处理可能的None
类型:
if zansoup:
print 'nickname:', zansoup.get('title'), ' ',
print 'person_url:', zansoup.get('href')
else
print 'there was an error ...'