Java 之旅

中级04 - Java Web

2019-08-26  本文已影响0人  晓风残月1994

一、Java Web初体验

1. 使用Java发送HTTP请求

可以借助第三方库,比如 httpclient,接着使用 IOUtils 操作 IO 流将消息实体转化为 HTML 字符串:

import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.IOException;
import java.io.InputStream;

public class HTTPRequest {

    public static void main(String[] args) throws IOException {
        CloseableHttpClient httpclient = HttpClients.createDefault();
        HttpGet httpGet = new HttpGet("http://localhost:8080/auth");
        // 还把自己伪装成一个浏览器
        httpGet.addHeader("user-agent", "ozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36");
        CloseableHttpResponse response = httpclient.execute(httpGet);
        try {
            System.out.println(response.getStatusLine());
            HttpEntity entity1 = response.getEntity();
            InputStream is = entity1.getContent();
            String html = IOUtils.toString(is, "UTF-8");
            System.out.println(html);
            // do something useful with the response body
            // and ensure it is fully consumed
            EntityUtils.consume(entity1);
        } finally {
            response.close();
        }
    }

}

2. 使用爬虫抓取指定仓库中的Pull request信息

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class Crawler {
    static class GitHubPullRequest {
        // Pull request的编号
        int number;
        // Pull request的标题
        String title;
        // Pull request的作者的GitHub id
        String author;

        GitHubPullRequest(int number, String title, String author) {
            this.number = number;
            this.title = title;
            this.author = author;
        }

        @Override
        public String toString() {
            return number + "\n" + title + "\n" + author + "\n";
        }
    }

    // 给定一个仓库名,例如"golang/go",或者"gradle/gradle",返回第一页的Pull request信息
    public static List<GitHubPullRequest> getFirstPageOfPullRequests(String repo) throws IOException {
        Document doc = Jsoup.connect("https://github.com/" + repo + "/pulls").get();
        ArrayList<Element> issues = doc.select(".js-issue-row");
        ArrayList<GitHubPullRequest> results = new ArrayList<>();
        for (Element element : issues) {
            GitHubPullRequest pr = new GitHubPullRequest(
                    Integer.parseInt(element.attr("id").substring(6)),
                    element.select(".js-navigation-open").get(0).text(),
                    element.select(".muted-link").get(0).text()
            );
            System.out.println(pr);
            results.add(pr);
        }
        return results;
    }

    public static void main(String[] args) throws IOException {
        getFirstPageOfPullRequests("golang/go");
    }
}

二、关于HTTP

HTTP是互联网世界的基石。

2. HTTP常用方法

3. HTTP Status

彩蛋1:
https://http.cat/
彩蛋2:
HTTP Status 418
彩蛋3:
HTTP Status 6xx

4. HTTP重要的请求header

5. HTTP重要的响应header

6. HTTP body

7. 其它

HTTP的消息头是不区分大小写的
HTTP 是无状态的,每个请求之间都是独立的,最常用的就是使用 cookie 这一客户端存储技术来持久化客户端状态(HTTP cookies - MDN

1. 模拟登录后携带cookie获取用户信息

import com.alibaba.fastjson.JSON;
import org.apache.commons.io.IOUtils;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.BasicCookieStore;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;

public class Crawler {

    public static String loginAndGetResponse(String username, String password) throws IOException {
        // 通过 builder 模式创建一个具有 cookie 存储功能的客户端实例
        CloseableHttpClient httpclient = HttpClients.custom().setDefaultCookieStore(new BasicCookieStore()).build();
        // 尝试登录并被 set cookie
        HttpPost httpPost = new HttpPost("http://example.com/auth/login");
        Map<String, String> map = new HashMap<>();
        map.put("username", username);
        map.put("password", password);
        httpPost.setEntity(new StringEntity(JSON.toJSONString(map)));
        httpPost.setHeader("Content-Type", "application/json");
        httpPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36");
        httpclient.execute(httpPost).close();
        // 携带 cookie 进一步获取 auth 信息
        HttpGet httpGet = new HttpGet("http://example.com/auth");
        CloseableHttpResponse authResponse = httpclient.execute(httpGet);
        try {
            InputStream is = authResponse.getEntity().getContent();
            String authResponseBody = IOUtils.toString(is, "UTF-8");
            System.out.println(authResponseBody);
            return authResponseBody;
        } finally {
            authResponse.close();
        }
    }

    public static void main(String[] args) throws IOException {
        loginAndGetResponse("admin", "admin");
    }

}
上一篇 下一篇

猜你喜欢

热点阅读