让前端飞Web前端之路技术干货

dart 爬取 妹子图 && 豆瓣影评

2019-10-08  本文已影响0人  luacoding

目的

dart 服务端爬虫实践

目标网站

使用库

功能

代码

妹子图

根据目标页面元素,获取图片地址

getImage([page = 1]) async {
  try {
    var headers = {
      'User-Agent':
          'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
    };
    var res = await http.get(
        page == 1
            ? 'https://www.mzitu.com'
            : 'https://www.mzitu.com/page/$page/',
        headers: headers);
    if (res.statusCode == 200) {
      String body = res.body;
      Document dom = parse(body);
      var imgs = dom.querySelectorAll('#pins > li > a > img');
      imgs.forEach((v) {
        String filename = v.attributes['alt'];
        Download.image(
            v.attributes['data-original'],
            filename.replaceAll(' ', ''),
            {'Referer': 'https://www.mzitu.com/'});
      });
    }
  } catch (e) {
    print(e);
  }
}

图片下载

妹子图下载需要设置referer

class Download {
  // 下载图片
  static image(url, fileName, [Map<String, String> headers]) async {
    try {
      var res = await http.get(url, headers: headers);
      var image = img.decodeImage(res.bodyBytes);
      await File('./img/${fileName}.png').writeAsBytes(img.encodePng(image));
    } catch (e) {
      print(e);
    }
  }
}

豆瓣

douban([page = 1]) async {
  try {
    var headers = {
      'User-Agent':
          "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
    };
    var res = await http.get(
        'https://movie.douban.com/subject/3882715/reviews?start=${20 * page}',
        headers: headers);
    if (res.statusCode == 200) {
      String body = res.body;
      Document dom = parse(body);
      var items = dom.querySelectorAll('.main.review-item');
      items.forEach((v) {
        String name = v.querySelector('.name').text.trim();
        String avator = v.querySelector('.avator img').attributes['src'];
        String content = v.querySelector('.short-content').text.trim();
        String time = v.querySelector('.main-meta').text.trim();
        print(
            {'name': name, 'avatar': avator, 'content': content, 'time': time});
      });
    }
  } catch (e) {
    print(e);
  }
}

展示

列表 图片

代码

github

上一篇下一篇

猜你喜欢

热点阅读