JAVA版Selenium爬虫初体验
2019-09-26 本文已影响0人
十九贝勒
Selenium用浏览器爬取数据,感觉很cool啊。趁有空赶紧写了一个hello world!
1 下载浏览器
firefox浏览器
chrome浏览器
需要本地安装过firefox或者chrome
2 动手写java
maven pom.xml中引用Selenium
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>4.0.0-alpha-2</version>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-server-standalone</artifactId>
<version>2.53.0</version>
</dependency>
写hellowolrd类
public static void loginYY( String username,String password){
System.setProperty("webdriver.chrome.driver", "F://wb//chromedriver.exe");
ChromeOptions options = new ChromeOptions();
ChromeDriver webDriver = new ChromeDriver(options);
String url = "http://www.yy.com/98187265/98187265";
webDriver.get(url); //
// 与浏览器同步非常重要,必须等待浏览器加载完毕
webDriver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
//找到登陆按钮 并点击
webDriver.findElement(By.xpath("//a[@class='w-head__main__cnt__right__item__tl__t wHead__login__btn']")).click();
//找到账号密码登陆 并点击
webDriver.findElement(By.xpath("//a[@id='back2Commonlogin-btn']")).click();
//找到内部iframe
WebElement iframe = webDriver.findElement(By.className("udbsdk_frm"));
webDriver.switchTo().frame(iframe);
//设置账号
webDriver.findElement(By.xpath("//input[@class='placeholder E_acct']")).sendKeys(username);
webDriver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
//设置密码
webDriver.findElement(By.xpath("//input[@class='placeholder E_passwd']")).sendKeys(password);
webDriver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
List<WebElement> elements= webDriver.findElements(By.xpath("//div[@class='form_item form_opra']/a[@class='m_button_large E_login']"));
for(WebElement w:elements){
if(w.getText().equals("登录")){
//点击登陆按钮
w.click();
}
}
}