利用R语言把某网站的所有那啥妹子都爬了下来!
不管怎么说,先上妹子
## gilr clawer
library(RCurl)
library(dplyr)
library(rvest)
library(downloader)
setwd("E:\\girl")
start=24656
end=24657
#end=25370
base_url="https://m.nvshens.com/g/"
myheader=c("User-Agent"="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
"Accept"="*/*",
"Accept-Language"="zh-CN,zh;q=0.8",
"Connection"="keep-alive"
)
failure_url=data.frame()
for(url_parameter in start:end){
#print(url_parameter)
surt_url=paste0(base_url,url_parameter)
curl = getCurlHandle()
temp=getURL(surt_url,httpheader=myheader,curl=curl)
if(getCurlInfo(curl)$response.code==200){
print("有效地址")
img_url<-temp%>%read_html()%>%html_nodes("div.ck-parent-div")%>%html_nodes("img")%>%html_attr("src")
for(j in 1:ncol(t(img_url))){
download(img_url[j],paste0(url_parameter,"-",j,".jpg"),mode = "wb")
}
}else {
print("无效地址")
failure_url<-rbind(failure_url,surt_url)
}
Sys.sleep(1)
}
还有更多 。。。。