我正在用Java创建简单的采集卡供我使用。总的来说,我想做的是:
那是我的代码
package com.amazon.tests;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class Grabber {
private WebDriver driver;
private List<String> internalLinks = new ArrayList<String>();
private Set<String> noDuplicates = new HashSet<>();
private List<WebElement> linksOnPage = new ArrayList<>();
private List<String> listOld = new ArrayList<>();
private List<String> listNew = new ArrayList<>();
private List<String> listTemp = new ArrayList<>();
@BeforeClass
public void setup() {
System.setProperty("webdriver.chrome.driver","C:/TEST/LIB/chromedriver.exe");
driver = new ChromeDriver();
}
@AfterClass
public void teardown() {
driver.close();
}
@Test
public void grabber()
{
String hrefvalue = null;
boolean check = false;
driver.get("https://www.redbullmobile.pl/");
linksOnPage = driver.findElements(By.tagName("a"));
for (int i = 0; i<linksOnPage.size(); i++)
{
hrefvalue = linksOnPage.get(i).getAttribute("href");
if(hrefvalue != null)
{
if(hrefvalue.contains("redbullmobile") && !hrefvalue.contains("facebook") && !hrefvalue.contains("redbullmobile.play.pl") && !hrefvalue.contains("redbullmobilecollect") && !hrefvalue.contains("fb.com") && !hrefvalue.contains(".pdf"))
{
noDuplicates.add(linksOnPage.get(i).getAttribute("href"));
}
}
}
listOld.addAll(noDuplicates);
hrefvalue = null;
linksOnPage.clear();
noDuplicates.clear();
do
{
listTemp.clear();
for (int i = 0; i<listOld.size(); i++)
{
driver.get(listOld.get(i));
linksOnPage = driver.findElements(By.tagName("a"));
for (int i1 = 0; i1<linksOnPage.size(); i1++)
{
hrefvalue = linksOnPage.get(i1).getAttribute("href");
if(hrefvalue != null)
{
if(hrefvalue.contains("redbullmobile") && !hrefvalue.contains("facebook") && !hrefvalue.contains("redbullmobile.play.pl") && !hrefvalue.contains("redbullmobilecollect") && !hrefvalue.contains("fb.com") && !hrefvalue.contains(".pdf"))
{
noDuplicates.add(linksOnPage.get(i1).getAttribute("href"));
}
}
}
}
//When all links from listOld are visited compare listOld and listNew, every link listNew that's not on List is added to listTemp.
//Links from ListTemp and ListOld are added to ListFinal
internalLinks.addAll(listOld);
internalLinks.addAll(listTemp);
//listOld and listNew are cleared, listTemp content is added to listOld
listOld.clear();
listNew.clear();
listOld.addAll(listTemp);
//Go back to 3rd step
//Program end when at then end of 6th step listTemp = null (so simple do...while with check, check changes to true when at the end of 6th step listTemp = null)
if (listTemp.isEmpty())
{
check = true;
}
}while(check);
}
}
但是我在步骤6上遇到问题。我真的不知道如何将listOld
上的每个元素与listNew
上的每个元素进行比较,然后将不匹配的元素添加到listTemp
。有人可以帮我吗?
我还认为我可以通过在{{1}开始之前将do...while
添加到https://www.redbullmobile.pl/
来缩短这段代码(基本上消除(listOld
开始之前的代码) }。我是对的还是错过了什么?
答案 0 :(得分:0)
如您所知,List包含一个addAll
方法,用于将列表的所有元素添加到第一个元素。
它还包含一个removeAll
method,用于执行相反操作。
布尔值removeAll(Collection c)
从此列表中删除指定集合中包含的所有元素