Android开发Android开发Android开发经验谈

Android 使用Jsoup爬取码云开源项目

2019-07-19  本文已影响7人  iot_xc

先放一下官方文档

implementation 'org.jsoup:jsoup:1.12.1'

Jsoup可以把网页解析成Document对象,然后我们根据对应的元素id或者class以及其他的属性,获取对应的信息

几个重要且常用的方法:
 connect(url): 从URL获取HTML来解析
 getElementById(String id):通过id来获取
 getElementsByTag(String tagName):通过标签名字来获取
 getElementsByClass(String className):通过类名来获取
 getElementsByAttribute(String key):通过属性名字来获取
 getElementsByAttributeValue(String key, String value):通过指定的属性名字,属性值来获取
 getAllElements():获取所有元素
 select(String cssQuery):通过类似于css或jQuery的选择器来查找元素

话不多说,直接上教程

进入码云->移动开发

码云.jpg
按F12查看网页内容,按shift+ctrl+C选中列表可以在右边查看到对应的信息
dev_html.jpg
每个元素的属性都清楚后,开始写代码解析
根据上面分析的信息先建个文章实体类
data class ArticleBean(val avatar: String, //头像
                       val title: String,  //标题
                       val label: String,  //分类
                       val watchers: Int,  //关注数
                       val collect: Int,   //收藏数
                       val forkNum: Int,   //fork数
                       val desc: String,   //描述
                       val date: String,   //时间
                       val urlLink: String //文章跳转链接
): Serializable

然后使用上面提到的connect方法去获取document

val url = "https://gitee.com/explore/mobile-develop?order=recommend"
val document = Jsoup.connect(url).get()

获取到网页内容后开始解析它,根据上面的截图可以看到,列表的内容被一个div包裹起来了,这个div的class是ui relaxed divided items explore-repo__list,那我们就根据这个class去获取我们需要的文章,注意:在填写class时要将中间的空格改成"."

val elements = document.select("div.ui.relaxed.divided.items.explore-repo__list").select("div.item")

如果说这个属性没有class,但是有id,这时候就应该将"."(不是上面说的空格)改为"#"

val elements = document.select("div#ui.relaxed.divided.items.explore-repo__list").select("div.item")
article_item.jpg

下面就是重复上面的步骤,根据class或者id,获取我们需要的数据,一层层解析就好了

elements.forEach {
                val social = it.select("div.content").select("div.explore-project__meta-social.pull-right").select("a")
                articles.add(
                    ArticleBean(
                        it.select("a.project-creator-link.ui.avatar.image.pull-left").select("img").attr("src"),
                        it.select("div.content").select("a.title.project-namespace-path").text(),
                        it.select("div.content").select("span>a").text(),
                        social[0].text().toInt(),
                        social[1].text().toInt(),
                        social[2].text().toInt(),
                        it.select("div.project-desc").text(),
                        it.select("div.project-latest").text(),
                        baseUrl + it.select("a.project-creator-link.ui.avatar.image.pull-left").attr("href")
                ))
            }
Video_20190719_031346_446.gif

MainActivity:

class MainActivity : AppCompatActivity(), ArticleAdapter.OnItemClick {
    private var articles = ArrayList<ArticleBean>()
    private val mAdapter: ArticleAdapter by lazy {
        ArticleAdapter(articles, this)
    }

    private val baseUrl = "https://gitee.com"

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)
        recyclerView.run {
            mAdapter.setOnItenClick(this@MainActivity)
            adapter = mAdapter
            layoutManager = LinearLayoutManager(this@MainActivity)
            addItemDecoration(DividerItemDecoration(this@MainActivity, DividerItemDecoration.VERTICAL))
        }

        Thread(Runnable { jsoupData() }).start()
    }

    private fun jsoupData() {
        val url = "https://gitee.com/explore/mobile-develop?order=recommend"

        try {
            val document = Jsoup.connect(url).get()
            val elements = document.select("div.ui.relaxed.divided.items.explore-repo__list").select("div.item")
            elements.forEach {
                val social = it.select("div.content").select("div.explore-project__meta-social.pull-right").select("a")
                articles.add(
                    ArticleBean(
                        it.select("a.project-creator-link.ui.avatar.image.pull-left").select("img").attr("src"),
                        it.select("div.content").select("a.title.project-namespace-path").text(),
                        it.select("div.content").select("span>a").text(),
                        social[0].text().toInt(),
                        social[1].text().toInt(),
                        social[2].text().toInt(),
                        it.select("div.project-desc").text(),
                        it.select("div.project-latest").text(),
                        baseUrl + it.select("a.project-creator-link.ui.avatar.image.pull-left").attr("href")
                ))
            }
            runOnUiThread {
                mAdapter.notifyDataSetChanged()
            }
        } catch (e: Exception) {
            Log.e("error--->", e.message)
        }
    }

    override fun onClick(articleBean: ArticleBean, position: Int) {
        val intent = Intent(this, WebviewActivity::class.java)
        intent.putExtra("articleBean", articleBean)
        startActivity(intent)
    }
}

adapter:

class ArticleAdapter(var items: List<ArticleBean>, val content: Context): RecyclerView.Adapter<ArticleAdapter.ViewHolder>() {

    private var onItemClick: OnItemClick? = null

    public fun setOnItenClick(onItemClick: OnItemClick){
        this.onItemClick = onItemClick
    }

    override fun onCreateViewHolder(parent: ViewGroup, viewType: Int): ViewHolder {
        val view = LayoutInflater.from(parent.context).inflate(R.layout.article_item_layout, parent, false)
        val holder = ViewHolder(view)
        if (onItemClick != null){
            holder.itemView.setOnClickListener {
                val position = holder.adapterPosition
                val articleBean = items.get(position)
                onItemClick?.onClick(articleBean, position)
            }
        }
        return holder
    }

    override fun getItemCount(): Int {
        return items.size
    }

    override fun onBindViewHolder(holder: ViewHolder, position: Int) {
        Glide.with(content).load(items[position].avatar).apply(RequestOptions.circleCropTransform()).into(holder.ivAvatar)

        holder.tvTitle.text = items[position].title
        holder.tvContent.text = items[position].desc
        holder.tvWatchers.text = "${items[position].watchers}"
        holder.tvCollect.text = "${items[position].collect}"
        holder.tvFork.text = "${items[position].forkNum}"
        holder.tvDate.text = items[position].date

    }


    class ViewHolder(itemView: View) : RecyclerView.ViewHolder(itemView) {
        val ivAvatar: ImageView = itemView.findViewById(R.id.iv_avatar)
        val tvTitle: TextView = itemView.findViewById(R.id.tv_title)
        val tvContent: TextView = itemView.findViewById(R.id.tv_content)
        val tvWatchers: TextView = itemView.findViewById(R.id.tv_watchers)
        val tvCollect: TextView = itemView.findViewById(R.id.tv_collect)
        val tvFork: TextView = itemView.findViewById(R.id.tv_fork)
        val tvDate: TextView = itemView.findViewById(R.id.tv_date)
    }

    interface OnItemClick{
        fun onClick(articleBean: ArticleBean, position: Int)
    }
}

有什么不懂得留言告诉我

上一篇下一篇

猜你喜欢

热点阅读