在python中使用Selenium Webdriver导出多个扫描文档

时间:2019-07-18 12:14:05

标签: python selenium-webdriver

我需要通过python中的Selenium Webbdriver导出多次扫描上传的文档。根据我的代码,我可以单击包含扫描文档的第一个“查看”按钮。请建议保存包含扫描文档的多个视图按钮。对于第一次单击“查看”按钮和单击“多个”按钮并保存文档,我已完成以下代码。

enter code here

  from selenium import webdriver
  from selenium.webdriver.common.by import By
  from selenium.webdriver.support.ui import WebDriverWait
  from selenium.webdriver.support import expected_conditions as EC
  from selenium.webdriver.common.keys import Keys
  import urllib.request
  from bs4 import BeautifulSoup
  import os
  from selenium.webdriver.support.select import Select
  import time
  import pandas as pd
  url = 'https://maharerait.mahaonline.gov.in'
  chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'

  driver = webdriver.Chrome(executable_path=chrome_path)
  driver.get(url)
  WebDriverWait(driver, 
    20).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='search- 
    pro-details']//a[contains(.,'Search Project Details')]"))).click()
  Registered_Project_radio= 
     WebDriverWait(driver,
     10).until(EC.element_to_be_clickable((By.ID,"Promoter")))

  driver.execute_script("arguments[0].click();",Registered_Project_radio)
  Application = driver.find_element_by_id("CertiNo")
  Application.send_keys("P50500000005")
  Search = WebDriverWait(driver, 
      10).until(EC.element_to_be_clickable((By.ID,"btnSearch")))
  driver.execute_script("arguments[0].click();",Search)
  View = [item.get_attribute('href') for item in 
       driver.find_elements_by_tag_name("a") if
  item.get_attribute('href') is not None]
  View = View[0]
  request = urllib.request.Request(View)
  driver.get(View)
  html = urllib.request.urlopen(request).read()
  soup = BeautifulSoup(html, 'html.parser')
  divPInfo = soup.find("div", {"id": "DivDocument"})
  title = divPInfo.find("div", {'class': 'x_panel'}, 
     recursive=False).find("div", {'class': 'x_title'}).find(
              "h2").text.strip()
 print(title)
 with open("uploads.csv" , "a") as csv_file:
      csv_file.write(title + "\n")
      csv_file.close()    
 table = pd.read_html(driver.page_source)[11]                 
 print(table)
 table.to_csv("uploads.csv" , sep=',',index = False)
 btn = WebDriverWait(driver, 
    20).until(EC.element_to_be_clickable((By.XPATH, "//button[@class='btn 
    btn-info btn-xs' and @id='btnShow_10']")))
 driver.execute_script("arguments[0].click();",btn)
 # get tag <object>
 obj = driver.find_element_by_tag_name('object')

 # get `data=`
 data = obj.get_attribute('data')
 # get text after `base64,`
 text = data.split(',')[1]

 # encode text to PDF's content (as bytes)
 import base64
 #bytes = base64.b64decode(text)

 # save bytes in file
 with open('output1.pdf', 'wb') as fp:
    fp.write(data)

0 个答案:

没有答案