Android 使用Jsoup爬取码云开源项目
2019-07-19 本文已影响7人
iot_xc
先放一下官方文档
implementation 'org.jsoup:jsoup:1.12.1'
Jsoup可以把网页解析成Document对象,然后我们根据对应的元素id或者class以及其他的属性,获取对应的信息
几个重要且常用的方法:
connect(url): 从URL获取HTML来解析
getElementById(String id):通过id来获取
getElementsByTag(String tagName):通过标签名字来获取
getElementsByClass(String className):通过类名来获取
getElementsByAttribute(String key):通过属性名字来获取
getElementsByAttributeValue(String key, String value):通过指定的属性名字,属性值来获取
getAllElements():获取所有元素
select(String cssQuery):通过类似于css或jQuery的选择器来查找元素
话不多说,直接上教程
进入码云->移动开发
按F12查看网页内容,按shift+ctrl+C选中列表可以在右边查看到对应的信息
dev_html.jpg
每个元素的属性都清楚后,开始写代码解析
根据上面分析的信息先建个文章实体类
data class ArticleBean(val avatar: String, //头像
val title: String, //标题
val label: String, //分类
val watchers: Int, //关注数
val collect: Int, //收藏数
val forkNum: Int, //fork数
val desc: String, //描述
val date: String, //时间
val urlLink: String //文章跳转链接
): Serializable
然后使用上面提到的connect方法去获取document
val url = "https://gitee.com/explore/mobile-develop?order=recommend"
val document = Jsoup.connect(url).get()
获取到网页内容后开始解析它,根据上面的截图可以看到,列表的内容被一个div包裹起来了,这个div的class是ui relaxed divided items explore-repo__list,那我们就根据这个class去获取我们需要的文章,注意:在填写class时要将中间的空格改成"."
val elements = document.select("div.ui.relaxed.divided.items.explore-repo__list").select("div.item")
如果说这个属性没有class,但是有id,这时候就应该将"."(不是上面说的空格)改为"#"
val elements = document.select("div#ui.relaxed.divided.items.explore-repo__list").select("div.item")
article_item.jpg
下面就是重复上面的步骤,根据class或者id,获取我们需要的数据,一层层解析就好了
elements.forEach {
val social = it.select("div.content").select("div.explore-project__meta-social.pull-right").select("a")
articles.add(
ArticleBean(
it.select("a.project-creator-link.ui.avatar.image.pull-left").select("img").attr("src"),
it.select("div.content").select("a.title.project-namespace-path").text(),
it.select("div.content").select("span>a").text(),
social[0].text().toInt(),
social[1].text().toInt(),
social[2].text().toInt(),
it.select("div.project-desc").text(),
it.select("div.project-latest").text(),
baseUrl + it.select("a.project-creator-link.ui.avatar.image.pull-left").attr("href")
))
}
Video_20190719_031346_446.gif
MainActivity:
class MainActivity : AppCompatActivity(), ArticleAdapter.OnItemClick {
private var articles = ArrayList<ArticleBean>()
private val mAdapter: ArticleAdapter by lazy {
ArticleAdapter(articles, this)
}
private val baseUrl = "https://gitee.com"
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_main)
recyclerView.run {
mAdapter.setOnItenClick(this@MainActivity)
adapter = mAdapter
layoutManager = LinearLayoutManager(this@MainActivity)
addItemDecoration(DividerItemDecoration(this@MainActivity, DividerItemDecoration.VERTICAL))
}
Thread(Runnable { jsoupData() }).start()
}
private fun jsoupData() {
val url = "https://gitee.com/explore/mobile-develop?order=recommend"
try {
val document = Jsoup.connect(url).get()
val elements = document.select("div.ui.relaxed.divided.items.explore-repo__list").select("div.item")
elements.forEach {
val social = it.select("div.content").select("div.explore-project__meta-social.pull-right").select("a")
articles.add(
ArticleBean(
it.select("a.project-creator-link.ui.avatar.image.pull-left").select("img").attr("src"),
it.select("div.content").select("a.title.project-namespace-path").text(),
it.select("div.content").select("span>a").text(),
social[0].text().toInt(),
social[1].text().toInt(),
social[2].text().toInt(),
it.select("div.project-desc").text(),
it.select("div.project-latest").text(),
baseUrl + it.select("a.project-creator-link.ui.avatar.image.pull-left").attr("href")
))
}
runOnUiThread {
mAdapter.notifyDataSetChanged()
}
} catch (e: Exception) {
Log.e("error--->", e.message)
}
}
override fun onClick(articleBean: ArticleBean, position: Int) {
val intent = Intent(this, WebviewActivity::class.java)
intent.putExtra("articleBean", articleBean)
startActivity(intent)
}
}
adapter:
class ArticleAdapter(var items: List<ArticleBean>, val content: Context): RecyclerView.Adapter<ArticleAdapter.ViewHolder>() {
private var onItemClick: OnItemClick? = null
public fun setOnItenClick(onItemClick: OnItemClick){
this.onItemClick = onItemClick
}
override fun onCreateViewHolder(parent: ViewGroup, viewType: Int): ViewHolder {
val view = LayoutInflater.from(parent.context).inflate(R.layout.article_item_layout, parent, false)
val holder = ViewHolder(view)
if (onItemClick != null){
holder.itemView.setOnClickListener {
val position = holder.adapterPosition
val articleBean = items.get(position)
onItemClick?.onClick(articleBean, position)
}
}
return holder
}
override fun getItemCount(): Int {
return items.size
}
override fun onBindViewHolder(holder: ViewHolder, position: Int) {
Glide.with(content).load(items[position].avatar).apply(RequestOptions.circleCropTransform()).into(holder.ivAvatar)
holder.tvTitle.text = items[position].title
holder.tvContent.text = items[position].desc
holder.tvWatchers.text = "${items[position].watchers}"
holder.tvCollect.text = "${items[position].collect}"
holder.tvFork.text = "${items[position].forkNum}"
holder.tvDate.text = items[position].date
}
class ViewHolder(itemView: View) : RecyclerView.ViewHolder(itemView) {
val ivAvatar: ImageView = itemView.findViewById(R.id.iv_avatar)
val tvTitle: TextView = itemView.findViewById(R.id.tv_title)
val tvContent: TextView = itemView.findViewById(R.id.tv_content)
val tvWatchers: TextView = itemView.findViewById(R.id.tv_watchers)
val tvCollect: TextView = itemView.findViewById(R.id.tv_collect)
val tvFork: TextView = itemView.findViewById(R.id.tv_fork)
val tvDate: TextView = itemView.findViewById(R.id.tv_date)
}
interface OnItemClick{
fun onClick(articleBean: ArticleBean, position: Int)
}
}
有什么不懂得留言告诉我