android利用jsoup抓取数据
2018-10-16 本文已影响0人
Peakmain
效果图


首先分析html:

添加依赖
implementation 'com.squareup.okhttp3:okhttp:3.11.0'
implementation 'org.jsoup:jsoup:1.10.2'
implementation'com.github.bumptech.glide:glide:3.7.0'
将String的url解析成html
mRecyclerView = findViewById(R.id.main_list);
Request request = new Request.Builder().
url("http://www.jianshu.com").
//如果请求的url需要提交参数,那么需改为post方式并提交对应的参数
get().
build();
Call call = okHttpClient.newCall(request);
call.enqueue(new Callback() {
@Override
public void onFailure(Call call, IOException e) {
Log.e("TAG", e.toString());
}
@Override
public void onResponse(Call call, Response response) throws IOException {
if (response.isSuccessful()) {
String result = response.body().string();
//解析html
parseHtml(result);
}
}
});
解析Html
private void parseHtml(String html) {
//将html转为Document对象
Document document = Jsoup.parse(html);
//获得li的元素集合
Elements elements = document.select("div#list-container ul li");
data = new ArrayList<>();
JsoupBean homeBean;
for (Element element : elements) {
//获得作者
String author = element.select("div.meta a").first().text();
//获得标题
String title = element.select("a.title").first().text();
//获得图片url,因为文章有可能没有图片,所以这里需要特殊处理一下
String image = element.select("a.wrap-img").first() != null ?
element.select("a.wrap-img").first().children().first().attr("src").substring(26)
: "";
//获得文章详情url
String targetUrl = element.select("a.title").first().attr("href");
String content = element.select("p.abstract").first().text();
homeBean = new JsoupBean();
homeBean.setAuthor(author);
homeBean.setTitle(title);
homeBean.setImageUrl(image);
homeBean.setArticleUrl(articleBaseUrl + targetUrl);
homeBean.setContent(content);
data.add(homeBean);
}
}
实体类
private String imageUrl;
private String title;
private String articleUrl;
private String author;
private String content;
.....
JsoupAdpater适配器
public class JsoupAdpater extends RecyclerView.Adapter<JsoupAdpater.ViewHolder> {
private final List<JsoupBean> data;
private final Context mContext;
private String baseImageUrl = "https://upload-images.jianshu.io";
public JsoupAdpater(Context context, List<JsoupBean> data) {
this.mContext = context;
this.data = data;
}
@NonNull
@Override
public ViewHolder onCreateViewHolder(@NonNull ViewGroup parent, int viewType) {
View view = View.inflate(parent.getContext(), R.layout.item_jsoup, null);
return new ViewHolder(view);
}
@Override
public void onBindViewHolder(@NonNull ViewHolder holder, int position) {
final JsoupBean homeBean = data.get(position);
holder.tv_text.setText(homeBean.getContent());
holder.tv_name.setText(homeBean.getTitle());
holder.tv_author.setText(homeBean.getAuthor());
if (!TextUtils.isEmpty(homeBean.getImageUrl()))
Glide.with(mContext).load(baseImageUrl + homeBean.getImageUrl()).into(holder.iv_logo);
holder.itemView.setOnClickListener(new View.OnClickListener() {
@Override
public void onClick(View v) {
Toast.makeText(mContext, homeBean.getArticleUrl(), Toast.LENGTH_SHORT).show();
}
});
}
@Override
public int getItemCount() {
return data.size();
}
public class ViewHolder extends RecyclerView.ViewHolder {
TextView tv_text;
TextView tv_name;
ImageView iv_logo;
TextView tv_author;
public ViewHolder(View itemView) {
super(itemView);
tv_name = itemView.findViewById(R.id.tv_name);
iv_logo = itemView.findViewById(R.id.iv_logo);
tv_text = itemView.findViewById(R.id.tv_text);
tv_author = itemView.findViewById(R.id.tv_author);
}
}
}