B站视频下载(VideoHelper)

2018-10-27  本文已影响0人  Asche

继续上次的知乎爬虫, 这次开始了哔哩哔哩的爬虫实践;

首先介绍下如何下载吧: VideoHelper 里面有三种方式下载b站视频。

同样的流程, 还是先抓包,分析参数,寻找参数(包括之前的请求包和页面源码),找出视频真实地址, 然后在模拟。

抓包是注意几个参数:

aid:每个视频都会有对应的 aid, 包括ep类型的;

cid:弹幕的id, 通过相关api可由cid找到对应的资源列表

ep_id: 就是地址栏上显示的ep类型的id了

这里详细的流程我就不介绍了(其实我是来宣传VideoHelper 的,目前还支持知乎等网站视频, 欢迎star。滑稽‘(>﹏<))

其中需要注意的是模拟发包是有些请求头是不能掉的, user-agent我就不说了, 不如Referer;

另外我发现网上目前仅存的b站的视频爬虫好像大多不支持ep类型的, 不过我那个最近测试是支持了的, 但是vip专属的也是会直接报错;

另外注明:该项目参考了you-get的部分api

下面老规矩贴上主要源码:

package website;

import bean.BilibiliBean;
import bean.VideoBean;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import util.DownloadUtil;
import util.HttpUtil;
import util.MD5Encoder;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.*;

import static util.PrintUtil.println;

/**

// private List<String> urls = new ArrayList<>();
private String playUrl;
private String fileName;
private int timeLength;
private int fileSize = 0;
private int aid;
private int cid;

// 视频类型
private final int AV_VIDEO = 1;
private final int EP_VIDEO = 2;
private final int SS_VIDEO = 3;

private int type = AV_VIDEO;
private boolean isSupported;

// ep的关联系列
private List<BilibiliBean> serialList = new ArrayList<>();

// 是否已经解析
private boolean isResolved;


public Bilibili() {
}

/**
 * 先获取信息再决定是否下载
 * @param playUrl
 * @param outputDir
 */
public Bilibili(String playUrl, String outputDir) {
    if (!isResolved) {
        this.playUrl = playUrl;

        String[] strs = playUrl.split("/");

        for (String str : strs) {
            if (str.matches("av\\d{4,}")) {
                aid = Integer.parseInt(str.substring(2));
                isSupported = true;
                break;
            } else if(str.matches("ep\\d{4,}")){
                type = EP_VIDEO;
                isSupported = true;
                break;
            } else if(str.matches("ss\\d{4,}")){
                type = SS_VIDEO;
                isSupported = true;
                break;
            }
        }

        try {
            switch (type) {
                case SS_VIDEO:
                case EP_VIDEO:
                    initEp();

                    String epApi = generateEpApi(EpApi, cid, quality);
                    println(epApi);

                    parseEpApiResponse(epApi);
                    break;
                case AV_VIDEO:
                    initAv();

                    String avApi = generateAvApi(AvApi, cid, quality);
                    println(avApi);

                    parseAvApiResponse(avApi);
                    break;
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
        isResolved = true;
    }
}

@Override
public void downloadByUrl(String playUrl, String outputDir) {
    println("Bilibili start: ");

    this.playUrl = playUrl;
    String[] strs = playUrl.split("/");

    for (String str : strs) {
        if (str.matches("av\\d{4,}")) {
            aid = Integer.parseInt(str.substring(2));
            isSupported = true;
            break;
        } else if(str.matches("ep\\d{4,}")){
            type = EP_VIDEO;
            isSupported = true;
            break;
        } else if(str.matches("ss\\d{4,}")){
            type = SS_VIDEO;
            isSupported = true;
            break;
        }
    }

    try {

        if (!isResolved) {
            switch (type) {
                case SS_VIDEO:
                case EP_VIDEO:
                    initEp();

                    String epApi = generateEpApi(EpApi, cid, quality);
                    println(epApi);

                    parseEpApiResponse(epApi);
                    break;
                case AV_VIDEO:
                    initAv();

                    String avApi = generateAvApi(AvApi, cid, quality);
                    println(avApi);

                    parseAvApiResponse(avApi);
                    break;
            }
            isResolved = true;
        }

        println("# Title: " + fileName);
        println("     -TimeLength: " + timeLength / 1000 / 60 + ":" + String.format("%02d", timeLength / 1000 % 60));
        println("     -File Size: " + fileSize / 1024 / 1024 + " M");

        download(urls, outputDir);

    } catch (Exception e) {
        e.printStackTrace();
    }
}

/**
 * 内部下载入口
 *
 * @param videoSrcs
 * @param outputDir
 */
@Override
public void download(List<String> videoSrcs, String outputDir) throws IOException {
    Map<String, List<String>> headerMap = new HashMap<>();
    // 缺失Referer会导致453错误
    headerMap.put("Referer", Collections.singletonList("http://interface.bilibili.com/v2/playurl?appkey=84956560bc028eb7&cid=59389212&otype=json&qn=3&quality=3&type=&sign=4c841d687bb7e479e3111428c6a4d3b8"));

    int index = 0;

    for (String src : videoSrcs) {
        println("Download: " + ++index + "/" + videoSrcs.size());

        String fileDir;
        if (videoSrcs.size() == 1) {

            fileDir = outputDir + File.separatorChar + fileName.replaceAll("[/|\\\\]", "") + ".flv";
        } else {
            fileDir = outputDir + File.separatorChar + fileName.replaceAll("[/|\\\\]", "") + "【" + index + "】.flv";
        }

        DownloadUtil.downloadVideo(src, fileDir, headerMap);
    }
    println("Download: All Done!");
}

@Override
public VideoBean getInfo() {
    VideoBean bean = new VideoBean();
    bean.setTitle(fileName);
    bean.setTimeLength(timeLength / 1000 / 60 + ":" + String.format("%02d", timeLength / 1000 % 60));
    bean.setSize(fileSize / 1024 / 1024);
    return bean;
}

public List<BilibiliBean> getSerialList(){
    return serialList;
}

/**
 * cid, fileName
 *
 * @throws IOException
 */
private void initAv() throws IOException {
    String result = HttpUtil.getResponseContent(ApiGetList + aid);
    JSONObject jb = (JSONObject) new JSONArray(result).get(0);
    cid = jb.getInt("cid");

    Document doc = Jsoup.connect(playUrl).get();

    Element ele = doc.selectFirst("div[id=viewbox_report]").selectFirst("h1");
    if (ele.hasAttr("title"))
        fileName = ele.attr("title");

}

/**
 * cid, fileName and related eps
 *
 * @throws IOException
 */
private void initEp() throws IOException {
    Document doc = Jsoup.connect(playUrl).get();
    Element ele = doc.body().child(2);

    String preResult = ele.toString();
    // println(preResult);

    String result = preResult.substring(preResult.indexOf("__=") + 3, preResult.indexOf(";(function()"));
    // println(result);

    JSONObject object = new JSONObject(result);

    JSONObject curEpInfo = object.getJSONObject("epInfo");

    fileName = object.getJSONObject("mediaInfo").getString("title");

    cid = curEpInfo.getInt("cid");


    JSONArray ja = object.getJSONArray("epList");

    for (Object obj : ja) {
        JSONObject epObject = (JSONObject) obj;

        int aid = epObject.getInt("aid");
        int cid = epObject.getInt("cid");
        int duration = epObject.getInt("duration");
        int epId = epObject.getInt("ep_id");

        String index = epObject.getString("index");
        String indexTitle = epObject.getString("index_title");

        BilibiliBean bean = new BilibiliBean(aid, cid, duration, epId, index, indexTitle);

        serialList.add(bean);

        println(bean.toString());
    }
}

/**
 * timeLength, fileSize, urls
 *
 * @param avReqApi
 * @throws IOException
 */
private void parseAvApiResponse(String avReqApi) throws IOException {
    String result = HttpUtil.getResponseContent(avReqApi);

    // println(result);

    JSONObject jsonObject = new JSONObject(result);
    timeLength = jsonObject.getInt("timelength");

    JSONArray ja = jsonObject.getJSONArray("durl");

    Iterator<Object> iterator = ja.iterator();
    while (iterator.hasNext()) {
        JSONObject jb = (JSONObject) iterator.next();

        String videoSrc = jb.getString("url");
        urls.add(videoSrc);

        fileSize += jb.getInt("size");
    }
}

/**
 * timeLength, fileSize, urls
 *
 * @param epReqApi
 * @throws IOException
 * @throws DocumentException
 */
private void parseEpApiResponse(String epReqApi) throws IOException, DocumentException {
    String response = HttpUtil.getResponseContent(epReqApi);

    SAXReader reader = new SAXReader();
    org.dom4j.Element rootElement = reader.read(new ByteArrayInputStream(response.getBytes("utf-8"))).getRootElement();

    timeLength = Integer.parseInt(rootElement.element("timelength").getText().trim());

    List<org.dom4j.Element> elements = rootElement.elements("durl");

    for (org.dom4j.Element ele : elements) {
        int curSize = Integer.parseInt(ele.element("size").getText());
        fileSize += curSize;

        String url = ele.element("url").getText();
        urls.add(url);
    }

    println(fileName + ": " + fileSize / 1024 / 1024 + "M");
}

/**
 * 生成av类型视频下载信息的api请求链接
 *
 * @param url
 * @param cid
 * @param quality
 * @return
 */
private String generateAvApi(String url, int cid, int quality) {
    String paramStr = String.format("appkey=84956560bc028eb7&cid=%d&otype=json&qn=%d&quality=%d&type=", cid, quality, quality);
    try {
        String checkSum = MD5Encoder.md5(paramStr + SEC_1).toLowerCase();
        return url + paramStr + "&sign=" + checkSum;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}

/**
 * 生成ep类型视频下载信息的api请求链接
 *
 * @param url
 * @param cid
 * @param quality
 * @return
 */
private String generateEpApi(String url, int cid, int quality) {
    String paramStr = String.format("cid=%d&module=bangumi&player=1&quality=%d&ts=%s",
            cid, quality, System.currentTimeMillis() / 1000 + "");
    try {
        String checkSum = MD5Encoder.md5(paramStr + SEC_2).toLowerCase();
        return url + paramStr + "&sign=" + checkSum;
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}

}

完整代码位于:

https://github.com/asche910/VideoHelper

上一篇下一篇

猜你喜欢

热点阅读